diff options
1059 files changed, 12164 insertions, 6578 deletions
@@ -226,6 +226,7 @@ Fangrui Song <i@maskray.me> <maskray@google.com> Felipe W Damasio <felipewd@terra.com.br> Felix Kuhling <fxkuehl@gmx.de> Felix Moeller <felix@derklecks.de> +Feng Tang <feng.79.tang@gmail.com> <feng.tang@intel.com> Fenglin Wu <quic_fenglinw@quicinc.com> <fenglinw@codeaurora.org> Filipe Lautert <filipe@icewall.org> Finn Thain <fthain@linux-m68k.org> <fthain@telegraphics.com.au> @@ -317,6 +318,8 @@ Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com> Jean Tourrilhes <jt@hpl.hp.com> Jeevan Shriram <quic_jshriram@quicinc.com> <jshriram@codeaurora.org> Jeff Garzik <jgarzik@pretzel.yyz.us> +Jeff Johnson <jeff.johnson@oss.qualcomm.com> <jjohnson@codeaurora.org> +Jeff Johnson <jeff.johnson@oss.qualcomm.com> <quic_jjohnson@quicinc.com> Jeff Layton <jlayton@kernel.org> <jlayton@poochiereds.net> Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com> Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com> @@ -376,6 +379,7 @@ Juha Yrjola <juha.yrjola@solidboot.com> Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com> Iskren Chernev <me@iskren.info> <iskren.chernev@gmail.com> Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org> +Kalle Valo <kvalo@kernel.org> <quic_kvalo@quicinc.com> Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org> Karthikeyan Periyasamy <quic_periyasa@quicinc.com> <periyasa@codeaurora.org> Kathiravan T <quic_kathirav@quicinc.com> <kathirav@codeaurora.org> @@ -518,6 +522,7 @@ Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il> Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com> Naoya Horiguchi <nao.horiguchi@gmail.com> <n-horiguchi@ah.jp.nec.com> Naoya Horiguchi <nao.horiguchi@gmail.com> <naoya.horiguchi@nec.com> +Natalie Vock <natalie.vock@gmx.de> <friedrich.vock@gmx.de> Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com> Naveen N Rao <naveen@kernel.org> <naveen.n.rao@linux.ibm.com> Naveen N Rao <naveen@kernel.org> <naveen.n.rao@linux.vnet.ibm.com> @@ -530,6 +535,7 @@ Nicholas Piggin <npiggin@gmail.com> <npiggin@kernel.dk> Nicholas Piggin <npiggin@gmail.com> <npiggin@suse.de> Nicholas Piggin <npiggin@gmail.com> <nickpiggin@yahoo.com.au> Nicholas Piggin <npiggin@gmail.com> <piggin@cyberone.com.au> +Nick Desaulniers <nick.desaulniers+lkml@gmail.com> <ndesaulniers@google.com> Nicolas Ferre <nicolas.ferre@microchip.com> <nicolas.ferre@atmel.com> Nicolas Pitre <nico@fluxnic.net> <nicolas.pitre@linaro.org> Nicolas Pitre <nico@fluxnic.net> <nico@linaro.org> @@ -608,6 +614,8 @@ Richard Leitner <richard.leitner@linux.dev> <me@g0hl1n.net> Richard Leitner <richard.leitner@linux.dev> <richard.leitner@skidata.com> Robert Foss <rfoss@kernel.org> <robert.foss@linaro.org> Rocky Liao <quic_rjliao@quicinc.com> <rjliao@codeaurora.org> +Rodrigo Siqueira <siqueira@igalia.com> <rodrigosiqueiramelo@gmail.com> +Rodrigo Siqueira <siqueira@igalia.com> <Rodrigo.Siqueira@amd.com> Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com> Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com> Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru> @@ -2515,11 +2515,9 @@ D: SLS distribution D: Initial implementation of VC's, pty's and select() N: Pavel Machek -E: pavel@ucw.cz +E: pavel@kernel.org P: 4096R/92DFCE96 4FA7 9EEF FCD4 C44F C585 B8C7 C060 2241 92DF CE96 -D: Softcursor for vga, hypertech cdrom support, vcsa bugfix, nbd, -D: sun4/330 port, capabilities for elf, speedup for rm on ext2, USB, -D: work on suspend-to-ram/disk, killing duplicates from ioctl32, +D: NBD, Sun4/330 port, USB, work on suspend-to-ram/disk, D: Altera SoCFPGA and Nokia N900 support. S: Czech Republic diff --git a/Documentation/arch/arm64/gcs.rst b/Documentation/arch/arm64/gcs.rst index 1f65a3193e77..226c0b008456 100644 --- a/Documentation/arch/arm64/gcs.rst +++ b/Documentation/arch/arm64/gcs.rst @@ -37,7 +37,7 @@ intended to be exhaustive. shadow stacks rather than GCS. * Support for GCS is reported to userspace via HWCAP_GCS in the aux vector - AT_HWCAP2 entry. + AT_HWCAP entry. * GCS is enabled per thread. While there is support for disabling GCS at runtime this should be done with great care. diff --git a/Documentation/arch/powerpc/cxl.rst b/Documentation/arch/powerpc/cxl.rst index d2d77057610e..778adda740d2 100644 --- a/Documentation/arch/powerpc/cxl.rst +++ b/Documentation/arch/powerpc/cxl.rst @@ -18,6 +18,7 @@ Introduction both access system memory directly and with the same effective addresses. + **This driver is deprecated and will be removed in a future release.** Hardware overview ================= @@ -453,7 +454,7 @@ Sysfs Class A cxl sysfs class is added under /sys/class/cxl to facilitate enumeration and tuning of the accelerators. Its layout is - described in Documentation/ABI/testing/sysfs-class-cxl + described in Documentation/ABI/obsolete/sysfs-class-cxl Udev rules diff --git a/Documentation/arch/x86/sva.rst b/Documentation/arch/x86/sva.rst index 33cb05005982..6a759984d471 100644 --- a/Documentation/arch/x86/sva.rst +++ b/Documentation/arch/x86/sva.rst @@ -25,7 +25,7 @@ to cache translations for virtual addresses. The IOMMU driver uses the mmu_notifier() support to keep the device TLB cache and the CPU cache in sync. When an ATS lookup fails for a virtual address, the device should use the PRI in order to request the virtual address to be paged into the -CPU page tables. The device must use ATS again in order the fetch the +CPU page tables. The device must use ATS again in order to fetch the translation before use. Shared Hardware Workqueues @@ -216,7 +216,7 @@ submitting work and processing completions. Single Root I/O Virtualization (SR-IOV) focuses on providing independent hardware interfaces for virtualizing hardware. Hence, it's required to be -almost fully functional interface to software supporting the traditional +an almost fully functional interface to software supporting the traditional BARs, space for interrupts via MSI-X, its own register layout. Virtual Functions (VFs) are assisted by the Physical Function (PF) driver. diff --git a/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml b/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml index 932f981265cc..52016a141227 100644 --- a/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/rockchip/pmu.yaml @@ -53,11 +53,17 @@ properties: reg: maxItems: 1 + power-controller: + type: object + + reboot-mode: + type: object + required: - compatible - reg -additionalProperties: true +additionalProperties: false examples: - | diff --git a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml index 0858fd635282..4cdff6161bf0 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gpucc.yaml @@ -8,6 +8,7 @@ title: Qualcomm Graphics Clock & Reset Controller maintainers: - Taniya Das <quic_tdas@quicinc.com> + - Imran Shaik <quic_imrashai@quicinc.com> description: | Qualcomm graphics clock control module provides the clocks, resets and power @@ -23,10 +24,12 @@ description: | include/dt-bindings/clock/qcom,gpucc-sm8150.h include/dt-bindings/clock/qcom,gpucc-sm8250.h include/dt-bindings/clock/qcom,gpucc-sm8350.h + include/dt-bindings/clock/qcom,qcs8300-gpucc.h properties: compatible: enum: + - qcom,qcs8300-gpucc - qcom,sdm845-gpucc - qcom,sa8775p-gpucc - qcom,sc7180-gpucc diff --git a/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml index 36a60d8f5ae3..81623f59d11d 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sa8775p-camcc.yaml @@ -8,16 +8,20 @@ title: Qualcomm Camera Clock & Reset Controller on SA8775P maintainers: - Taniya Das <quic_tdas@quicinc.com> + - Imran Shaik <quic_imrashai@quicinc.com> description: | Qualcomm camera clock control module provides the clocks, resets and power domains on SA8775p. - See also: include/dt-bindings/clock/qcom,sa8775p-camcc.h + See also: + include/dt-bindings/clock/qcom,qcs8300-camcc.h + include/dt-bindings/clock/qcom,sa8775p-camcc.h properties: compatible: enum: + - qcom,qcs8300-camcc - qcom,sa8775p-camcc clocks: diff --git a/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml index 928131bff4c1..07e5d811d816 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sa8775p-videocc.yaml @@ -18,6 +18,7 @@ description: | properties: compatible: enum: + - qcom,qcs8300-videocc - qcom,sa8775p-videocc clocks: diff --git a/Documentation/devicetree/bindings/display/panel/powertip,hx8238a.yaml b/Documentation/devicetree/bindings/display/panel/powertip,hx8238a.yaml new file mode 100644 index 000000000000..b7d74faeb5d5 --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/powertip,hx8238a.yaml @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/powertip,hx8238a.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Powertip Electronic Technology Co. 320 x 240 LCD panel + +maintainers: + - Lukasz Majewski <lukma@denx.de> + +allOf: + - $ref: panel-dpi.yaml# + +properties: + compatible: + items: + - const: powertip,hx8238a + - {} # panel-dpi, but not listed here to avoid false select + + height-mm: true + panel-timing: true + port: true + power-supply: true + width-mm: true + +additionalProperties: false + +... diff --git a/Documentation/devicetree/bindings/display/panel/powertip,st7272.yaml b/Documentation/devicetree/bindings/display/panel/powertip,st7272.yaml new file mode 100644 index 000000000000..f3622800f13f --- /dev/null +++ b/Documentation/devicetree/bindings/display/panel/powertip,st7272.yaml @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/powertip,st7272.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Powertip Electronic Technology Co. 320 x 240 LCD panel + +maintainers: + - Lukasz Majewski <lukma@denx.de> + +allOf: + - $ref: panel-dpi.yaml# + +properties: + compatible: + items: + - const: powertip,st7272 + - {} # panel-dpi, but not listed here to avoid false select + + height-mm: true + panel-timing: true + port: true + power-supply: true + width-mm: true + +additionalProperties: false + +... diff --git a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml index 55e3e490d0e6..31c4ffcb599c 100644 --- a/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml +++ b/Documentation/devicetree/bindings/display/ti/ti,am65x-dss.yaml @@ -23,7 +23,7 @@ properties: compatible: enum: - ti,am625-dss - - ti,am62a7,dss + - ti,am62a7-dss - ti,am65x-dss reg: diff --git a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml index b2adc7174177..dca16e202da9 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml @@ -14,9 +14,8 @@ allOf: description: | The Microchip LAN966x outband interrupt controller (OIC) maps the internal - interrupt sources of the LAN966x device to an external interrupt. - When the LAN966x device is used as a PCI device, the external interrupt is - routed to the PCI interrupt. + interrupt sources of the LAN966x device to a PCI interrupt when the LAN966x + device is used as a PCI device. properties: compatible: diff --git a/Documentation/devicetree/bindings/mtd/cdns,hp-nfc.yaml b/Documentation/devicetree/bindings/mtd/cdns,hp-nfc.yaml index 0bed37a994c3..e1f4d7c35a88 100644 --- a/Documentation/devicetree/bindings/mtd/cdns,hp-nfc.yaml +++ b/Documentation/devicetree/bindings/mtd/cdns,hp-nfc.yaml @@ -33,6 +33,10 @@ properties: clocks: maxItems: 1 + clock-names: + items: + - const: nf_clk + dmas: maxItems: 1 @@ -51,6 +55,7 @@ required: - reg-names - interrupts - clocks + - clock-names unevaluatedProperties: false @@ -66,7 +71,8 @@ examples: #address-cells = <1>; #size-cells = <0>; interrupts = <GIC_SPI 97 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&nf_clk>; + clocks = <&clk>; + clock-names = "nf_clk"; cdns,board-delay-ps = <4830>; nand@0 { diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml index 070c4c9b8643..aace072e2d52 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml @@ -7,7 +7,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies ath10k wireless devices maintainers: - - Kalle Valo <kvalo@kernel.org> - Jeff Johnson <jjohnson@kernel.org> description: diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml index a71fdf05bc1e..a4425cf196ab 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml @@ -8,7 +8,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies ath11k wireless devices (PCIe) maintainers: - - Kalle Valo <kvalo@kernel.org> - Jeff Johnson <jjohnson@kernel.org> description: | diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml index ff5763dc66a8..a69ffb7b3cb8 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml @@ -8,7 +8,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies ath11k wireless devices maintainers: - - Kalle Valo <kvalo@kernel.org> - Jeff Johnson <jjohnson@kernel.org> description: | diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml index cbfb559f6b69..318f305405e3 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml @@ -9,7 +9,6 @@ title: Qualcomm Technologies ath12k wireless devices (PCIe) with WSI interface maintainers: - Jeff Johnson <jjohnson@kernel.org> - - Kalle Valo <kvalo@kernel.org> description: | Qualcomm Technologies IEEE 802.11be PCIe devices with WSI interface. diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath12k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath12k.yaml index 1b5884015b15..9e557cb838c7 100644 --- a/Documentation/devicetree/bindings/net/wireless/qcom,ath12k.yaml +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath12k.yaml @@ -9,7 +9,6 @@ title: Qualcomm Technologies ath12k wireless devices (PCIe) maintainers: - Jeff Johnson <quic_jjohnson@quicinc.com> - - Kalle Valo <kvalo@kernel.org> description: Qualcomm Technologies IEEE 802.11be PCIe devices. diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml index d37f544ab8aa..39c209249c9c 100644 --- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml +++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml @@ -36,6 +36,7 @@ properties: - qcom,qcs404-qfprom - qcom,qcs615-qfprom - qcom,qcs8300-qfprom + - qcom,sar2130p-qfprom - qcom,sc7180-qfprom - qcom,sc7280-qfprom - qcom,sc8280xp-qfprom diff --git a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml index f2fd2df68a9e..b7241ce975b9 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml @@ -22,7 +22,7 @@ description: Each sub-node is identified using the node's name, with valid values listed for each of the pmics below. - For mp5496, s1, s2 + For mp5496, s1, s2, l2, l5 For pm2250, s1, s2, s3, s4, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, l16, l17, l18, l19, l20, l21, l22 diff --git a/Documentation/driver-api/infrastructure.rst b/Documentation/driver-api/infrastructure.rst index 3d52dfdfa9fd..35e36fee4238 100644 --- a/Documentation/driver-api/infrastructure.rst +++ b/Documentation/driver-api/infrastructure.rst @@ -41,6 +41,12 @@ Device Drivers Base .. kernel-doc:: drivers/base/class.c :export: +.. kernel-doc:: include/linux/device/faux.h + :internal: + +.. kernel-doc:: drivers/base/faux.c + :export: + .. kernel-doc:: drivers/base/node.c :internal: diff --git a/Documentation/filesystems/bcachefs/SubmittingPatches.rst b/Documentation/filesystems/bcachefs/SubmittingPatches.rst new file mode 100644 index 000000000000..026b12ae0d6a --- /dev/null +++ b/Documentation/filesystems/bcachefs/SubmittingPatches.rst @@ -0,0 +1,98 @@ +Submitting patches to bcachefs: +=============================== + +Patches must be tested before being submitted, either with the xfstests suite +[0], or the full bcachefs test suite in ktest [1], depending on what's being +touched. Note that ktest wraps xfstests and will be an easier method to running +it for most users; it includes single-command wrappers for all the mainstream +in-kernel local filesystems. + +Patches will undergo more testing after being merged (including +lockdep/kasan/preempt/etc. variants), these are not generally required to be +run by the submitter - but do put some thought into what you're changing and +which tests might be relevant, e.g. are you dealing with tricky memory layout +work? kasan, are you doing locking work? then lockdep; and ktest includes +single-command variants for the debug build types you'll most likely need. + +The exception to this rule is incomplete WIP/RFC patches: if you're working on +something nontrivial, it's encouraged to send out a WIP patch to let people +know what you're doing and make sure you're on the right track. Just make sure +it includes a brief note as to what's done and what's incomplete, to avoid +confusion. + +Rigorous checkpatch.pl adherence is not required (many of its warnings are +considered out of date), but try not to deviate too much without reason. + +Focus on writing code that reads well and is organized well; code should be +aesthetically pleasing. + +CI: +=== + +Instead of running your tests locally, when running the full test suite it's +prefereable to let a server farm do it in parallel, and then have the results +in a nice test dashboard (which can tell you which failures are new, and +presents results in a git log view, avoiding the need for most bisecting). + +That exists [2], and community members may request an account. If you work for +a big tech company, you'll need to help out with server costs to get access - +but the CI is not restricted to running bcachefs tests: it runs any ktest test +(which generally makes it easy to wrap other tests that can run in qemu). + +Other things to think about: +============================ + +- How will we debug this code? Is there sufficient introspection to diagnose + when something starts acting wonky on a user machine? + + We don't necessarily need every single field of every data structure visible + with introspection, but having the important fields of all the core data + types wired up makes debugging drastically easier - a bit of thoughtful + foresight greatly reduces the need to have people build custom kernels with + debug patches. + + More broadly, think about all the debug tooling that might be needed. + +- Does it make the codebase more or less of a mess? Can we also try to do some + organizing, too? + +- Do new tests need to be written? New assertions? How do we know and verify + that the code is correct, and what happens if something goes wrong? + + We don't yet have automated code coverage analysis or easy fault injection - + but for now, pretend we did and ask what they might tell us. + + Assertions are hugely important, given that we don't yet have a systems + language that can do ergonomic embedded correctness proofs. Hitting an assert + in testing is much better than wandering off into undefined behaviour la-la + land - use them. Use them judiciously, and not as a replacement for proper + error handling, but use them. + +- Does it need to be performance tested? Should we add new peformance counters? + + bcachefs has a set of persistent runtime counters which can be viewed with + the 'bcachefs fs top' command; this should give users a basic idea of what + their filesystem is currently doing. If you're doing a new feature or looking + at old code, think if anything should be added. + +- If it's a new on disk format feature - have upgrades and downgrades been + tested? (Automated tests exists but aren't in the CI, due to the hassle of + disk image management; coordinate to have them run.) + +Mailing list, IRC: +================== + +Patches should hit the list [3], but much discussion and code review happens on +IRC as well [4]; many people appreciate the more conversational approach and +quicker feedback. + +Additionally, we have a lively user community doing excellent QA work, which +exists primarily on IRC. Please make use of that resource; user feedback is +important for any nontrivial feature, and documenting it in commit messages +would be a good idea. + +[0]: git://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git +[1]: https://evilpiepirate.org/git/ktest.git/ +[2]: https://evilpiepirate.org/~testdashboard/ci/ +[3]: linux-bcachefs@vger.kernel.org +[4]: irc.oftc.net#bcache, #bcachefs-dev diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst index 95fc4b90739e..7db4d7ceab58 100644 --- a/Documentation/filesystems/bcachefs/index.rst +++ b/Documentation/filesystems/bcachefs/index.rst @@ -9,4 +9,5 @@ bcachefs Documentation :numbered: CodingStyle + SubmittingPatches errorcodes diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 259cb211a338..655d8d10fe24 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1524,7 +1524,8 @@ attribute-sets: nested-attributes: bitset - name: hwtstamp-flags - type: u32 + type: nest + nested-attributes: bitset operations: enum-model: directional diff --git a/Documentation/networking/iso15765-2.rst b/Documentation/networking/iso15765-2.rst index 0e9d96074178..37ebb2c417cb 100644 --- a/Documentation/networking/iso15765-2.rst +++ b/Documentation/networking/iso15765-2.rst @@ -369,8 +369,8 @@ to their default. addr.can_family = AF_CAN; addr.can_ifindex = if_nametoindex("can0"); - addr.tp.tx_id = 0x18DA42F1 | CAN_EFF_FLAG; - addr.tp.rx_id = 0x18DAF142 | CAN_EFF_FLAG; + addr.can_addr.tp.tx_id = 0x18DA42F1 | CAN_EFF_FLAG; + addr.can_addr.tp.rx_id = 0x18DAF142 | CAN_EFF_FLAG; ret = bind(s, (struct sockaddr *)&addr, sizeof(addr)); if (ret < 0) diff --git a/Documentation/networking/strparser.rst b/Documentation/networking/strparser.rst index 6cab1f74ae05..7f623d1db72a 100644 --- a/Documentation/networking/strparser.rst +++ b/Documentation/networking/strparser.rst @@ -112,7 +112,7 @@ Functions Callbacks ========= -There are six callbacks: +There are seven callbacks: :: @@ -184,6 +184,13 @@ There are six callbacks: :: + int (*read_sock)(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor); + + The read_sock callback is used by strparser instead of + sock->ops->read_sock, if provided. + :: + int (*read_sock_done)(struct strparser *strp, int err); read_sock_done is called when the stream parser is done reading diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index daebce49cfdf..0e19d2f0d6bb 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -308,7 +308,7 @@ an involved disclosed party. The current ambassadors list: Google Kees Cook <keescook@chromium.org> - LLVM Nick Desaulniers <ndesaulniers@google.com> + LLVM Nick Desaulniers <nick.desaulniers+lkml@gmail.com> ============= ======================================================== If you want your organization to be added to the ambassadors list, please diff --git a/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst b/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst index 7d4d694967c7..9d444b9c46d3 100644 --- a/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst +++ b/Documentation/translations/sp_SP/process/embargoed-hardware-issues.rst @@ -287,7 +287,7 @@ revelada involucrada. La lista de embajadores actuales: Google Kees Cook <keescook@chromium.org> - LLVM Nick Desaulniers <ndesaulniers@google.com> + LLVM Nick Desaulniers <nick.desaulniers+lkml@gmail.com> ============= ======================================================== Si quiere que su organización se añada a la lista de embajadores, por diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index d639c61cb472..ad587f53fe41 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -8,7 +8,7 @@ Landlock: unprivileged access control ===================================== :Author: Mickaël Salaün -:Date: October 2024 +:Date: January 2025 The goal of Landlock is to enable restriction of ambient rights (e.g. global filesystem or network access) for a set of processes. Because Landlock @@ -329,11 +329,11 @@ non-sandboxed process, we can specify this restriction with A sandboxed process can connect to a non-sandboxed process when its domain is not scoped. If a process's domain is scoped, it can only connect to sockets created by processes in the same scope. -Moreover, If a process is scoped to send signal to a non-scoped process, it can +Moreover, if a process is scoped to send signal to a non-scoped process, it can only send signals to processes in the same scope. A connected datagram socket behaves like a stream socket when its domain is -scoped, meaning if the domain is scoped after the socket is connected , it can +scoped, meaning if the domain is scoped after the socket is connected, it can still :manpage:`send(2)` data just like a stream socket. However, in the same scenario, a non-connected datagram socket cannot send data (with :manpage:`sendto(2)`) outside its scope. diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 0d1c3a820ce6..2b52eb77e29c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -1419,7 +1419,7 @@ fetch) is injected in the guest. S390: ^^^^^ -Returns -EINVAL if the VM has the KVM_VM_S390_UCONTROL flag set. +Returns -EINVAL or -EEXIST if the VM has the KVM_VM_S390_UCONTROL flag set. Returns -EINVAL if called on a protected VM. 4.36 KVM_SET_TSS_ADDR diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa065..8e0736dc2ee0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1046,14 +1046,14 @@ F: drivers/crypto/ccp/hsti.* AMD DISPLAY CORE M: Harry Wentland <harry.wentland@amd.com> M: Leo Li <sunpeng.li@amd.com> -M: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com> +R: Rodrigo Siqueira <siqueira@igalia.com> L: amd-gfx@lists.freedesktop.org S: Supported T: git https://gitlab.freedesktop.org/agd5f/linux.git F: drivers/gpu/drm/amd/display/ AMD DISPLAY CORE - DML -M: Chaitanya Dhere <chaitanya.dhere@amd.com> +M: Austin Zheng <austin.zheng@amd.com> M: Jun Lei <jun.lei@amd.com> S: Supported F: drivers/gpu/drm/amd/display/dc/dml/ @@ -2209,8 +2209,8 @@ F: sound/soc/codecs/cs42l84.* F: sound/soc/codecs/ssm3515.c ARM/APPLE MACHINE SUPPORT -M: Hector Martin <marcan@marcan.st> M: Sven Peter <sven@svenpeter.dev> +M: Janne Grunau <j@jannau.net> R: Alyssa Rosenzweig <alyssa@rosenzweig.io> L: asahi@lists.linux.dev L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -2285,7 +2285,7 @@ F: drivers/irqchip/irq-aspeed-i2c-ic.c ARM/ASPEED MACHINE SUPPORT M: Joel Stanley <joel@jms.id.au> -R: Andrew Jeffery <andrew@codeconstruct.com.au> +M: Andrew Jeffery <andrew@codeconstruct.com.au> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-aspeed@lists.ozlabs.org (moderated for non-subscribers) S: Supported @@ -2878,7 +2878,7 @@ F: drivers/pinctrl/nxp/ ARM/NXP S32G/S32R DWMAC ETHERNET DRIVER M: Jan Petrous <jan.petrous@oss.nxp.com> -L: NXP S32 Linux Team <s32@nxp.com> +R: s32@nxp.com S: Maintained F: Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml F: drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c @@ -3655,7 +3655,6 @@ F: Documentation/devicetree/bindings/phy/phy-ath79-usb.txt F: drivers/phy/qualcomm/phy-ath79-usb.c ATHEROS ATH GENERIC UTILITIES -M: Kalle Valo <kvalo@kernel.org> M: Jeff Johnson <jjohnson@kernel.org> L: linux-wireless@vger.kernel.org S: Supported @@ -3860,13 +3859,6 @@ W: https://ez.analog.com/linux-software-drivers F: Documentation/devicetree/bindings/pwm/adi,axi-pwmgen.yaml F: drivers/pwm/pwm-axi-pwmgen.c -AXXIA I2C CONTROLLER -M: Krzysztof Adamski <krzysztof.adamski@nokia.com> -L: linux-i2c@vger.kernel.org -S: Maintained -F: Documentation/devicetree/bindings/i2c/i2c-axxia.txt -F: drivers/i2c/busses/i2c-axxia.c - AZ6007 DVB DRIVER M: Mauro Carvalho Chehab <mchehab@kernel.org> L: linux-media@vger.kernel.org @@ -3955,6 +3947,7 @@ M: Kent Overstreet <kent.overstreet@linux.dev> L: linux-bcachefs@vger.kernel.org S: Supported C: irc://irc.oftc.net/bcache +P: Documentation/filesystems/bcachefs/SubmittingPatches.rst T: git https://evilpiepirate.org/git/bcachefs.git F: fs/bcachefs/ F: Documentation/filesystems/bcachefs/ @@ -5663,7 +5656,7 @@ F: .clang-format CLANG/LLVM BUILD SUPPORT M: Nathan Chancellor <nathan@kernel.org> -R: Nick Desaulniers <ndesaulniers@google.com> +R: Nick Desaulniers <nick.desaulniers+lkml@gmail.com> R: Bill Wendling <morbo@google.com> R: Justin Stitt <justinstitt@google.com> L: llvm@lists.linux.dev @@ -5863,7 +5856,6 @@ F: Documentation/security/snp-tdx-threat-model.rst CONFIGFS M: Joel Becker <jlbec@evilplan.org> -M: Christoph Hellwig <hch@lst.de> S: Supported T: git git://git.infradead.org/users/hch/configfs.git F: fs/configfs/ @@ -5934,6 +5926,17 @@ F: tools/testing/selftests/cgroup/test_cpuset.c F: tools/testing/selftests/cgroup/test_cpuset_prs.sh F: tools/testing/selftests/cgroup/test_cpuset_v1_base.sh +CONTROL GROUP - DEVICE MEMORY CONTROLLER (DMEM) +M: Maarten Lankhorst <dev@lankhorst.se> +M: Maxime Ripard <mripard@kernel.org> +M: Natalie Vock <natalie.vock@gmx.de> +L: cgroups@vger.kernel.org +L: dri-devel@lists.freedesktop.org +S: Maintained +T: git https://gitlab.freedesktop.org/drm/misc/kernel.git +F: include/linux/cgroup_dmem.h +F: kernel/cgroup/dmem.c + CONTROL GROUP - MEMORY RESOURCE CONTROLLER (MEMCG) M: Johannes Weiner <hannes@cmpxchg.org> M: Michal Hocko <mhocko@kernel.org> @@ -6886,7 +6889,6 @@ F: kernel/dma/map_benchmark.c F: tools/testing/selftests/dma/ DMA MAPPING HELPERS -M: Christoph Hellwig <hch@lst.de> M: Marek Szyprowski <m.szyprowski@samsung.com> R: Robin Murphy <robin.murphy@arm.com> L: iommu@lists.linux.dev @@ -7116,8 +7118,10 @@ F: rust/kernel/device.rs F: rust/kernel/device_id.rs F: rust/kernel/devres.rs F: rust/kernel/driver.rs +F: rust/kernel/faux.rs F: rust/kernel/platform.rs F: samples/rust/rust_driver_platform.rs +F: samples/rust/rust_driver_faux.rs DRIVERS FOR OMAP ADAPTIVE VOLTAGE SCALING (AVS) M: Nishanth Menon <nm@ti.com> @@ -7431,7 +7435,6 @@ F: Documentation/devicetree/bindings/display/panel/novatek,nt36672a.yaml F: drivers/gpu/drm/panel/panel-novatek-nt36672a.c DRM DRIVER FOR NVIDIA GEFORCE/QUADRO GPUS -M: Karol Herbst <kherbst@redhat.com> M: Lyude Paul <lyude@redhat.com> M: Danilo Krummrich <dakr@kernel.org> L: dri-devel@lists.freedesktop.org @@ -9418,7 +9421,7 @@ F: fs/freevxfs/ FREEZER M: "Rafael J. Wysocki" <rafael@kernel.org> -M: Pavel Machek <pavel@ucw.cz> +M: Pavel Machek <pavel@kernel.org> L: linux-pm@vger.kernel.org S: Supported F: Documentation/power/freezing-of-tasks.rst @@ -9835,8 +9838,7 @@ F: drivers/input/touchscreen/goodix* GOOGLE ETHERNET DRIVERS M: Jeroen de Borst <jeroendb@google.com> -M: Praveen Kaligineedi <pkaligineedi@google.com> -R: Shailend Chand <shailend@google.com> +M: Harshitha Ramamurthy <hramamurthy@google.com> L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/device_drivers/ethernet/google/gve.rst @@ -9878,7 +9880,7 @@ S: Maintained F: drivers/staging/gpib/ GPIO ACPI SUPPORT -M: Mika Westerberg <mika.westerberg@linux.intel.com> +M: Mika Westerberg <westeri@kernel.org> M: Andy Shevchenko <andriy.shevchenko@linux.intel.com> L: linux-gpio@vger.kernel.org L: linux-acpi@vger.kernel.org @@ -10253,7 +10255,7 @@ F: drivers/video/fbdev/hgafb.c HIBERNATION (aka Software Suspend, aka swsusp) M: "Rafael J. Wysocki" <rafael@kernel.org> -M: Pavel Machek <pavel@ucw.cz> +M: Pavel Machek <pavel@kernel.org> L: linux-pm@vger.kernel.org S: Supported B: https://bugzilla.kernel.org @@ -10822,7 +10824,7 @@ S: Odd Fixes F: drivers/tty/hvc/ I2C ACPI SUPPORT -M: Mika Westerberg <mika.westerberg@linux.intel.com> +M: Mika Westerberg <westeri@kernel.org> L: linux-i2c@vger.kernel.org L: linux-acpi@vger.kernel.org S: Maintained @@ -13124,8 +13126,8 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har F: scripts/leaking_addresses.pl LED SUBSYSTEM -M: Pavel Machek <pavel@ucw.cz> M: Lee Jones <lee@kernel.org> +M: Pavel Machek <pavel@kernel.org> L: linux-leds@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/lee/leds.git @@ -15689,7 +15691,7 @@ F: include/uapi/linux/cciss*.h MICROSOFT MANA RDMA DRIVER M: Long Li <longli@microsoft.com> -M: Ajay Sharma <sharmaajay@microsoft.com> +M: Konstantin Taranov <kotaranov@microsoft.com> L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/mana/ @@ -16438,7 +16440,7 @@ X: drivers/net/can/ X: drivers/net/wireless/ NETWORKING DRIVERS (WIRELESS) -M: Kalle Valo <kvalo@kernel.org> +M: Johannes Berg <johannes@sipsolutions.net> L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/ @@ -16462,6 +16464,28 @@ F: include/net/dsa.h F: net/dsa/ F: tools/testing/selftests/drivers/net/dsa/ +NETWORKING [ETHTOOL] +M: Andrew Lunn <andrew@lunn.ch> +M: Jakub Kicinski <kuba@kernel.org> +F: Documentation/netlink/specs/ethtool.yaml +F: Documentation/networking/ethtool-netlink.rst +F: include/linux/ethtool* +F: include/uapi/linux/ethtool* +F: net/ethtool/ +F: tools/testing/selftests/drivers/net/*/ethtool* + +NETWORKING [ETHTOOL CABLE TEST] +M: Andrew Lunn <andrew@lunn.ch> +F: net/ethtool/cabletest.c +F: tools/testing/selftests/drivers/net/*/ethtool* +K: cable_test + +NETWORKING [ETHTOOL MAC MERGE] +M: Vladimir Oltean <vladimir.oltean@nxp.com> +F: net/ethtool/mm.c +F: tools/testing/selftests/drivers/net/hw/ethtool_mm.sh +K: ethtool_mm + NETWORKING [GENERAL] M: "David S. Miller" <davem@davemloft.net> M: Eric Dumazet <edumazet@google.com> @@ -16493,6 +16517,7 @@ F: include/linux/netdev* F: include/linux/netlink.h F: include/linux/netpoll.h F: include/linux/rtnetlink.h +F: include/linux/sctp.h F: include/linux/seq_file_net.h F: include/linux/skbuff* F: include/net/ @@ -16509,6 +16534,7 @@ F: include/uapi/linux/netdev* F: include/uapi/linux/netlink.h F: include/uapi/linux/netlink_diag.h F: include/uapi/linux/rtnetlink.h +F: include/uapi/linux/sctp.h F: lib/net_utils.c F: lib/random32.c F: net/ @@ -16621,6 +16647,7 @@ F: tools/testing/selftests/net/mptcp/ NETWORKING [TCP] M: Eric Dumazet <edumazet@google.com> M: Neal Cardwell <ncardwell@google.com> +R: Kuniyuki Iwashima <kuniyu@amazon.com> L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/net_cachelines/tcp_sock.rst @@ -16648,6 +16675,31 @@ F: include/net/tls.h F: include/uapi/linux/tls.h F: net/tls/* +NETWORKING [SOCKETS] +M: Eric Dumazet <edumazet@google.com> +M: Kuniyuki Iwashima <kuniyu@amazon.com> +M: Paolo Abeni <pabeni@redhat.com> +M: Willem de Bruijn <willemb@google.com> +S: Maintained +F: include/linux/sock_diag.h +F: include/linux/socket.h +F: include/linux/sockptr.h +F: include/net/sock.h +F: include/net/sock_reuseport.h +F: include/uapi/linux/socket.h +F: net/core/*sock* +F: net/core/scm.c +F: net/socket.c + +NETWORKING [UNIX SOCKETS] +M: Kuniyuki Iwashima <kuniyu@amazon.com> +S: Maintained +F: include/net/af_unix.h +F: include/net/netns/unix.h +F: include/uapi/linux/unix_diag.h +F: net/unix/ +F: tools/testing/selftests/net/af_unix/ + NETXEN (1/10) GbE SUPPORT M: Manish Chopra <manishc@marvell.com> M: Rahul Verma <rahulv@marvell.com> @@ -16781,7 +16833,7 @@ F: include/linux/tick.h F: kernel/time/tick*.* NOKIA N900 CAMERA SUPPORT (ET8EK8 SENSOR, AD5820 FOCUS) -M: Pavel Machek <pavel@ucw.cz> +M: Pavel Machek <pavel@kernel.org> M: Sakari Ailus <sakari.ailus@iki.fi> L: linux-media@vger.kernel.org S: Maintained @@ -17713,6 +17765,7 @@ L: netdev@vger.kernel.org L: dev@openvswitch.org S: Maintained W: http://openvswitch.org +F: Documentation/networking/openvswitch.rst F: include/uapi/linux/openvswitch.h F: net/openvswitch/ F: tools/testing/selftests/net/openvswitch/ @@ -19312,7 +19365,6 @@ Q: http://patchwork.linuxtv.org/project/linux-media/list/ F: drivers/media/tuners/qt1010* QUALCOMM ATH12K WIRELESS DRIVER -M: Kalle Valo <kvalo@kernel.org> M: Jeff Johnson <jjohnson@kernel.org> L: ath12k@lists.infradead.org S: Supported @@ -19322,7 +19374,6 @@ F: drivers/net/wireless/ath/ath12k/ N: ath12k QUALCOMM ATHEROS ATH10K WIRELESS DRIVER -M: Kalle Valo <kvalo@kernel.org> M: Jeff Johnson <jjohnson@kernel.org> L: ath10k@lists.infradead.org S: Supported @@ -19332,7 +19383,6 @@ F: drivers/net/wireless/ath/ath10k/ N: ath10k QUALCOMM ATHEROS ATH11K WIRELESS DRIVER -M: Kalle Valo <kvalo@kernel.org> M: Jeff Johnson <jjohnson@kernel.org> L: ath11k@lists.infradead.org S: Supported @@ -19467,6 +19517,15 @@ L: dmaengine@vger.kernel.org S: Supported F: drivers/dma/qcom/hidma* +QUALCOMM I2C QCOM GENI DRIVER +M: Mukesh Kumar Savaliya <quic_msavaliy@quicinc.com> +M: Viken Dadhaniya <quic_vdadhani@quicinc.com> +L: linux-i2c@vger.kernel.org +L: linux-arm-msm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml +F: drivers/i2c/busses/i2c-qcom-geni.c + QUALCOMM I2C CCI DRIVER M: Loic Poulain <loic.poulain@linaro.org> M: Robert Foss <rfoss@kernel.org> @@ -19607,7 +19666,6 @@ F: drivers/net/wireless/quantenna RADEON and AMDGPU DRM DRIVERS M: Alex Deucher <alexander.deucher@amd.com> M: Christian König <christian.koenig@amd.com> -M: Xinhui Pan <Xinhui.Pan@amd.com> L: amd-gfx@lists.freedesktop.org S: Supported B: https://gitlab.freedesktop.org/drm/amd/-/issues @@ -19829,7 +19887,7 @@ F: net/rds/ F: tools/testing/selftests/net/rds/ RDT - RESOURCE ALLOCATION -M: Fenghua Yu <fenghua.yu@intel.com> +M: Tony Luck <tony.luck@intel.com> M: Reinette Chatre <reinette.chatre@intel.com> L: linux-kernel@vger.kernel.org S: Supported @@ -20280,6 +20338,7 @@ RISC-V ARCHITECTURE M: Paul Walmsley <paul.walmsley@sifive.com> M: Palmer Dabbelt <palmer@dabbelt.com> M: Albert Ou <aou@eecs.berkeley.edu> +R: Alexandre Ghiti <alex@ghiti.fr> L: linux-riscv@lists.infradead.org S: Supported Q: https://patchwork.kernel.org/project/linux-riscv/list/ @@ -21873,10 +21932,13 @@ F: sound/soc/uniphier/ SOCKET TIMESTAMPING M: Willem de Bruijn <willemdebruijn.kernel@gmail.com> +R: Jason Xing <kernelxing@tencent.com> S: Maintained F: Documentation/networking/timestamping.rst F: include/linux/net_tstamp.h F: include/uapi/linux/net_tstamp.h +F: tools/testing/selftests/bpf/*/net_timestamping* +F: tools/testing/selftests/net/*timestamp* F: tools/testing/selftests/net/so_txtime.c SOEKRIS NET48XX LED SUPPORT @@ -22806,7 +22868,7 @@ F: drivers/sh/ SUSPEND TO RAM M: "Rafael J. Wysocki" <rafael@kernel.org> M: Len Brown <len.brown@intel.com> -M: Pavel Machek <pavel@ucw.cz> +M: Pavel Machek <pavel@kernel.org> L: linux-pm@vger.kernel.org S: Supported B: https://bugzilla.kernel.org @@ -24019,7 +24081,6 @@ F: tools/testing/selftests/ftrace/ TRACING MMIO ACCESSES (MMIOTRACE) M: Steven Rostedt <rostedt@goodmis.org> M: Masami Hiramatsu <mhiramat@kernel.org> -R: Karol Herbst <karolherbst@gmail.com> R: Pekka Paalanen <ppaalanen@gmail.com> L: linux-kernel@vger.kernel.org L: nouveau@lists.freedesktop.org @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -1120,8 +1120,8 @@ LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif # Align the bit size of userspace programs with the kernel -KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) -KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS)) +KBUILD_USERCFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) +KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) # make the checker run with the right architecture CHECKFLAGS += --arch=$(ARCH) @@ -1421,18 +1421,13 @@ ifneq ($(wildcard $(resolve_btfids_O)),) $(Q)$(MAKE) -sC $(srctree)/tools/bpf/resolve_btfids O=$(resolve_btfids_O) clean endif -# Clear a bunch of variables before executing the submake -ifeq ($(quiet),silent_) -tools_silent=s -endif - tools/: FORCE $(Q)mkdir -p $(objtree)/tools - $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ + $(Q)$(MAKE) LDFLAGS= O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ tools/%: FORCE $(Q)mkdir -p $(objtree)/tools - $(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(tools_silent) $(filter --j% -j,$(MAKEFLAGS))" O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ $* + $(Q)$(MAKE) LDFLAGS= O=$(abspath $(objtree)) subdir=tools -C $(srctree)/tools/ $* # --------------------------------------------------------------------------- # Kernel selftest diff --git a/arch/alpha/include/asm/elf.h b/arch/alpha/include/asm/elf.h index 4d7c46f50382..50c82187e60e 100644 --- a/arch/alpha/include/asm/elf.h +++ b/arch/alpha/include/asm/elf.h @@ -74,7 +74,7 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; /* * This is used to ensure we don't load something for the wrong architecture. */ -#define elf_check_arch(x) ((x)->e_machine == EM_ALPHA) +#define elf_check_arch(x) (((x)->e_machine == EM_ALPHA) && !((x)->e_flags & EF_ALPHA_32BIT)) /* * These are used to set parameters in the core dumps. @@ -137,10 +137,6 @@ extern int dump_elf_task(elf_greg_t *dest, struct task_struct *task); : amask (AMASK_CIX) ? "ev6" : "ev67"); \ }) -#define SET_PERSONALITY(EX) \ - set_personality(((EX).e_flags & EF_ALPHA_32BIT) \ - ? PER_LINUX_32BIT : PER_LINUX) - extern int alpha_l1i_cacheshape; extern int alpha_l1d_cacheshape; extern int alpha_l2_cacheshape; diff --git a/arch/alpha/include/asm/hwrpb.h b/arch/alpha/include/asm/hwrpb.h index fc76f36265ad..db831cf8de10 100644 --- a/arch/alpha/include/asm/hwrpb.h +++ b/arch/alpha/include/asm/hwrpb.h @@ -135,7 +135,7 @@ struct crb_struct { /* virtual->physical map */ unsigned long map_entries; unsigned long map_pages; - struct vf_map_struct map[1]; + struct vf_map_struct map[]; }; struct memclust_struct { diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 635f0a5f5bbd..02e8817a8921 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -360,7 +360,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) extern void paging_init(void); -/* We have our own get_unmapped_area to cope with ADDR_LIMIT_32BIT. */ +/* We have our own get_unmapped_area */ #define HAVE_ARCH_UNMAPPED_AREA #endif /* _ALPHA_PGTABLE_H */ diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 55bb1c09fd39..5dce5518a211 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -8,23 +8,19 @@ #ifndef __ASM_ALPHA_PROCESSOR_H #define __ASM_ALPHA_PROCESSOR_H -#include <linux/personality.h> /* for ADDR_LIMIT_32BIT */ - /* * We have a 42-bit user address space: 4TB user VM... */ #define TASK_SIZE (0x40000000000UL) -#define STACK_TOP \ - (current->personality & ADDR_LIMIT_32BIT ? 0x80000000 : 0x00120000000UL) +#define STACK_TOP (0x00120000000UL) #define STACK_TOP_MAX 0x00120000000UL /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ -#define TASK_UNMAPPED_BASE \ - ((current->personality & ADDR_LIMIT_32BIT) ? 0x40000000 : TASK_SIZE / 2) +#define TASK_UNMAPPED_BASE (TASK_SIZE / 2) /* This is dead. Everything has been moved to thread_info. */ struct thread_struct { }; diff --git a/arch/alpha/include/uapi/asm/ptrace.h b/arch/alpha/include/uapi/asm/ptrace.h index 5ca45934fcbb..72ed913a910f 100644 --- a/arch/alpha/include/uapi/asm/ptrace.h +++ b/arch/alpha/include/uapi/asm/ptrace.h @@ -42,6 +42,8 @@ struct pt_regs { unsigned long trap_a0; unsigned long trap_a1; unsigned long trap_a2; +/* This makes the stack 16-byte aligned as GCC expects */ + unsigned long __pad0; /* These are saved by PAL-code: */ unsigned long ps; unsigned long pc; diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c index 4cfeae42c79a..e9dad60b147f 100644 --- a/arch/alpha/kernel/asm-offsets.c +++ b/arch/alpha/kernel/asm-offsets.c @@ -19,9 +19,13 @@ static void __used foo(void) DEFINE(TI_STATUS, offsetof(struct thread_info, status)); BLANK(); + DEFINE(SP_OFF, offsetof(struct pt_regs, ps)); DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs)); BLANK(); + DEFINE(SWITCH_STACK_SIZE, sizeof(struct switch_stack)); + BLANK(); + DEFINE(HAE_CACHE, offsetof(struct alpha_machine_vector, hae_cache)); DEFINE(HAE_REG, offsetof(struct alpha_machine_vector, hae_register)); } diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index dd26062d75b3..f4d41b4538c2 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -15,10 +15,6 @@ .set noat .cfi_sections .debug_frame -/* Stack offsets. */ -#define SP_OFF 184 -#define SWITCH_STACK_SIZE 64 - .macro CFI_START_OSF_FRAME func .align 4 .globl \func @@ -198,8 +194,8 @@ CFI_END_OSF_FRAME entArith CFI_START_OSF_FRAME entMM SAVE_ALL /* save $9 - $15 so the inline exception code can manipulate them. */ - subq $sp, 56, $sp - .cfi_adjust_cfa_offset 56 + subq $sp, 64, $sp + .cfi_adjust_cfa_offset 64 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -214,7 +210,7 @@ CFI_START_OSF_FRAME entMM .cfi_rel_offset $13, 32 .cfi_rel_offset $14, 40 .cfi_rel_offset $15, 48 - addq $sp, 56, $19 + addq $sp, 64, $19 /* handle the fault */ lda $8, 0x3fff bic $sp, $8, $8 @@ -227,7 +223,7 @@ CFI_START_OSF_FRAME entMM ldq $13, 32($sp) ldq $14, 40($sp) ldq $15, 48($sp) - addq $sp, 56, $sp + addq $sp, 64, $sp .cfi_restore $9 .cfi_restore $10 .cfi_restore $11 @@ -235,7 +231,7 @@ CFI_START_OSF_FRAME entMM .cfi_restore $13 .cfi_restore $14 .cfi_restore $15 - .cfi_adjust_cfa_offset -56 + .cfi_adjust_cfa_offset -64 /* finish up the syscall as normal. */ br ret_from_sys_call CFI_END_OSF_FRAME entMM @@ -382,8 +378,8 @@ entUnaUser: .cfi_restore $0 .cfi_adjust_cfa_offset -256 SAVE_ALL /* setup normal kernel stack */ - lda $sp, -56($sp) - .cfi_adjust_cfa_offset 56 + lda $sp, -64($sp) + .cfi_adjust_cfa_offset 64 stq $9, 0($sp) stq $10, 8($sp) stq $11, 16($sp) @@ -399,7 +395,7 @@ entUnaUser: .cfi_rel_offset $14, 40 .cfi_rel_offset $15, 48 lda $8, 0x3fff - addq $sp, 56, $19 + addq $sp, 64, $19 bic $sp, $8, $8 jsr $26, do_entUnaUser ldq $9, 0($sp) @@ -409,7 +405,7 @@ entUnaUser: ldq $13, 32($sp) ldq $14, 40($sp) ldq $15, 48($sp) - lda $sp, 56($sp) + lda $sp, 64($sp) .cfi_restore $9 .cfi_restore $10 .cfi_restore $11 @@ -417,7 +413,7 @@ entUnaUser: .cfi_restore $13 .cfi_restore $14 .cfi_restore $15 - .cfi_adjust_cfa_offset -56 + .cfi_adjust_cfa_offset -64 br ret_from_sys_call CFI_END_OSF_FRAME entUna diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 86185021f75a..a08e8edef1a4 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1210,8 +1210,7 @@ SYSCALL_DEFINE1(old_adjtimex, struct timex32 __user *, txc_p) return ret; } -/* Get an address range which is currently unmapped. Similar to the - generic version except that we know how to honor ADDR_LIMIT_32BIT. */ +/* Get an address range which is currently unmapped. */ static unsigned long arch_get_unmapped_area_1(unsigned long addr, unsigned long len, @@ -1230,13 +1229,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags) { - unsigned long limit; - - /* "32 bit" actually means 31 bit, since pointers sign extend. */ - if (current->personality & ADDR_LIMIT_32BIT) - limit = 0x80000000; - else - limit = TASK_SIZE; + unsigned long limit = TASK_SIZE; if (len > limit) return -ENOMEM; diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 681f56089d9c..dc91de50f906 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -13,6 +13,7 @@ #include <linux/log2.h> #include <linux/dma-map-ops.h> #include <linux/iommu-helper.h> +#include <linux/string_choices.h> #include <asm/io.h> #include <asm/hwrpb.h> @@ -212,7 +213,7 @@ static int pci_dac_dma_supported(struct pci_dev *dev, u64 mask) /* If both conditions above are met, we are fine. */ DBGA("pci_dac_dma_supported %s from %ps\n", - ok ? "yes" : "no", __builtin_return_address(0)); + str_yes_no(ok), __builtin_return_address(0)); return ok; } diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index a9a38c80c4a7..7004397937cf 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -649,7 +649,7 @@ s_reg_to_mem (unsigned long s_reg) static int unauser_reg_offsets[32] = { R(r0), R(r1), R(r2), R(r3), R(r4), R(r5), R(r6), R(r7), R(r8), /* r9 ... r15 are stored in front of regs. */ - -56, -48, -40, -32, -24, -16, -8, + -64, -56, -48, -40, -32, -24, -16, /* padding at -8 */ R(r16), R(r17), R(r18), R(r19), R(r20), R(r21), R(r22), R(r23), R(r24), R(r25), R(r26), R(r27), R(r28), R(gp), diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 8c9850437e67..a9816bbc9f34 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -78,8 +78,8 @@ __load_new_mm_context(struct mm_struct *next_mm) /* Macro for exception fixup code to access integer registers. */ #define dpf_reg(r) \ - (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 : \ - (r) <= 18 ? (r)+10 : (r)-10]) + (((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-17 : \ + (r) <= 18 ? (r)+11 : (r)-10]) asmlinkage void do_page_fault(unsigned long address, unsigned long mmcsr, diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fcdd0ed3eca8..940343beb3d4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -225,7 +225,6 @@ config ARM64 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_FREGS select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_GRAPH_RETVAL select HAVE_GCC_PLUGINS select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && \ HW_PERF_EVENTS && HAVE_PERF_EVENTS_NMI diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 358c68565bfd..2b25d671365f 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -48,7 +48,11 @@ KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) \ KBUILD_CFLAGS += $(call cc-disable-warning, psabi) KBUILD_AFLAGS += $(compat_vdso) +ifeq ($(call test-ge, $(CONFIG_RUSTC_VERSION), 108500),y) +KBUILD_RUSTFLAGS += --target=aarch64-unknown-none-softfloat +else KBUILD_RUSTFLAGS += --target=aarch64-unknown-none -Ctarget-feature="-neon" +endif KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts index e4517f47d519..eb9470a00e54 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts @@ -226,7 +226,6 @@ }; &uart5 { - pinctrl-0 = <&uart5_xfer>; rts-gpios = <&gpio0 RK_PB5 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi index ae050cc6cd05..e80412abec08 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck.dtsi @@ -396,6 +396,12 @@ status = "okay"; }; +&uart5 { + /delete-property/ dmas; + /delete-property/ dma-names; + pinctrl-0 = <&uart5_xfer>; +}; + /* Mule UCAN */ &usb_host0_ehci { status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts index 67c246ad8b8c..ec2ce894da1f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus-lts.dts @@ -17,8 +17,7 @@ &gmac2io { phy-handle = <&yt8531c>; - tx_delay = <0x19>; - rx_delay = <0x05>; + phy-mode = "rgmii-id"; status = "okay"; mdio { diff --git a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dts b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dts index 324a8e951f7e..846b931e16d2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dts @@ -15,6 +15,7 @@ &gmac2io { phy-handle = <&rtl8211e>; + phy-mode = "rgmii"; tx_delay = <0x24>; rx_delay = <0x18>; status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dtsi b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dtsi index 4f193704e5dc..09508e324a28 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328-orangepi-r1-plus.dtsi @@ -109,7 +109,6 @@ assigned-clocks = <&cru SCLK_MAC2IO>, <&cru SCLK_MAC2IO_EXT>; assigned-clock-parents = <&gmac_clk>, <&gmac_clk>; clock_in_out = "input"; - phy-mode = "rgmii"; phy-supply = <&vcc_io>; pinctrl-0 = <&rgmiim1_pins>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi index 988e6ca32fac..a9ea4b0daa04 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi @@ -22,11 +22,11 @@ }; /* EC turns on w/ pp900_usb_en */ - pp900_usb: pp900-ap { + pp900_usb: regulator-pp900-ap { }; /* EC turns on w/ pp900_pcie_en */ - pp900_pcie: pp900-ap { + pp900_pcie: regulator-pp900-ap { }; pp3000: regulator-pp3000 { @@ -126,7 +126,7 @@ }; /* Always on; plain and simple */ - pp3000_ap: pp3000_emmc: pp3000 { + pp3000_ap: pp3000_emmc: regulator-pp3000 { }; pp1500_ap_io: regulator-pp1500-ap-io { @@ -160,7 +160,7 @@ }; /* EC turns on w/ pp3300_usb_en_l */ - pp3300_usb: pp3300 { + pp3300_usb: regulator-pp3300 { }; /* gpio is shared with pp1800_pcie and pinctrl is set there */ diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi index 19b23b438965..5e068377a0a2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet.dtsi @@ -92,7 +92,7 @@ }; /* EC turns on pp1800_s3_en */ - pp1800_s3: pp1800 { + pp1800_s3: regulator-pp1800 { }; /* pp3300 children, sorted by name */ @@ -109,11 +109,11 @@ }; /* EC turns on pp3300_s0_en */ - pp3300_s0: pp3300 { + pp3300_s0: regulator-pp3300 { }; /* EC turns on pp3300_s3_en */ - pp3300_s3: pp3300 { + pp3300_s3: regulator-pp3300 { }; /* diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 6d9e60b01225..7eca1da78cff 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -189,39 +189,39 @@ }; /* EC turns on w/ pp900_ddrpll_en */ - pp900_ddrpll: pp900-ap { + pp900_ddrpll: regulator-pp900-ap { }; /* EC turns on w/ pp900_pll_en */ - pp900_pll: pp900-ap { + pp900_pll: regulator-pp900-ap { }; /* EC turns on w/ pp900_pmu_en */ - pp900_pmu: pp900-ap { + pp900_pmu: regulator-pp900-ap { }; /* EC turns on w/ pp1800_s0_en_l */ - pp1800_ap_io: pp1800_emmc: pp1800_nfc: pp1800_s0: pp1800 { + pp1800_ap_io: pp1800_emmc: pp1800_nfc: pp1800_s0: regulator-pp1800 { }; /* EC turns on w/ pp1800_avdd_en_l */ - pp1800_avdd: pp1800 { + pp1800_avdd: regulator-pp1800 { }; /* EC turns on w/ pp1800_lid_en_l */ - pp1800_lid: pp1800_mic: pp1800 { + pp1800_lid: pp1800_mic: regulator-pp1800 { }; /* EC turns on w/ lpddr_pwr_en */ - pp1800_lpddr: pp1800 { + pp1800_lpddr: regulator-pp1800 { }; /* EC turns on w/ pp1800_pmu_en_l */ - pp1800_pmu: pp1800 { + pp1800_pmu: regulator-pp1800 { }; /* EC turns on w/ pp1800_usb_en_l */ - pp1800_usb: pp1800 { + pp1800_usb: regulator-pp1800 { }; pp3000_sd_slot: regulator-pp3000-sd-slot { @@ -259,11 +259,11 @@ }; /* EC turns on w/ pp3300_trackpad_en_l */ - pp3300_trackpad: pp3300-trackpad { + pp3300_trackpad: regulator-pp3300-trackpad { }; /* EC turns on w/ usb_a_en */ - pp5000_usb_a_vbus: pp5000 { + pp5000_usb_a_vbus: regulator-pp5000 { }; ap_rtc_clk: ap-rtc-clk { diff --git a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi index 8cfa30837ce7..c3abdfb04f8f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-base.dtsi @@ -549,10 +549,10 @@ mmu600_pcie: iommu@fc900000 { compatible = "arm,smmu-v3"; reg = <0x0 0xfc900000 0x0 0x200000>; - interrupts = <GIC_SPI 369 IRQ_TYPE_LEVEL_HIGH 0>, - <GIC_SPI 371 IRQ_TYPE_LEVEL_HIGH 0>, - <GIC_SPI 374 IRQ_TYPE_LEVEL_HIGH 0>, - <GIC_SPI 367 IRQ_TYPE_LEVEL_HIGH 0>; + interrupts = <GIC_SPI 369 IRQ_TYPE_EDGE_RISING 0>, + <GIC_SPI 371 IRQ_TYPE_EDGE_RISING 0>, + <GIC_SPI 374 IRQ_TYPE_EDGE_RISING 0>, + <GIC_SPI 367 IRQ_TYPE_EDGE_RISING 0>; interrupt-names = "eventq", "gerror", "priq", "cmdq-sync"; #iommu-cells = <1>; }; @@ -560,10 +560,10 @@ mmu600_php: iommu@fcb00000 { compatible = "arm,smmu-v3"; reg = <0x0 0xfcb00000 0x0 0x200000>; - interrupts = <GIC_SPI 381 IRQ_TYPE_LEVEL_HIGH 0>, - <GIC_SPI 383 IRQ_TYPE_LEVEL_HIGH 0>, - <GIC_SPI 386 IRQ_TYPE_LEVEL_HIGH 0>, - <GIC_SPI 379 IRQ_TYPE_LEVEL_HIGH 0>; + interrupts = <GIC_SPI 381 IRQ_TYPE_EDGE_RISING 0>, + <GIC_SPI 383 IRQ_TYPE_EDGE_RISING 0>, + <GIC_SPI 386 IRQ_TYPE_EDGE_RISING 0>, + <GIC_SPI 379 IRQ_TYPE_EDGE_RISING 0>; interrupt-names = "eventq", "gerror", "priq", "cmdq-sync"; #iommu-cells = <1>; status = "disabled"; @@ -2668,9 +2668,9 @@ rockchip,hw-tshut-temp = <120000>; rockchip,hw-tshut-mode = <0>; /* tshut mode 0:CRU 1:GPIO */ rockchip,hw-tshut-polarity = <0>; /* tshut polarity 0:LOW 1:HIGH */ - pinctrl-0 = <&tsadc_gpio_func>; - pinctrl-1 = <&tsadc_shut>; - pinctrl-names = "gpio", "otpout"; + pinctrl-0 = <&tsadc_shut_org>; + pinctrl-1 = <&tsadc_gpio_func>; + pinctrl-names = "default", "sleep"; #thermal-sensor-cells = <1>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-genbook.dts b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-genbook.dts index 92f0ed83c990..bc6b43a77153 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-genbook.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-coolpi-cm5-genbook.dts @@ -113,7 +113,7 @@ compatible = "regulator-fixed"; regulator-name = "vcc3v3_lcd"; enable-active-high; - gpio = <&gpio1 RK_PC4 GPIO_ACTIVE_HIGH>; + gpio = <&gpio0 RK_PC4 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&lcdpwr_en>; vin-supply = <&vcc3v3_sys>; @@ -241,7 +241,7 @@ &pinctrl { lcd { lcdpwr_en: lcdpwr-en { - rockchip,pins = <1 RK_PC4 RK_FUNC_GPIO &pcfg_pull_down>; + rockchip,pins = <0 RK_PC4 RK_FUNC_GPIO &pcfg_pull_down>; }; bl_en: bl-en { diff --git a/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi index 4a950907ea6f..840b638af1c2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-extra.dtsi @@ -213,7 +213,6 @@ interrupt-names = "sys", "pmc", "msg", "legacy", "err", "dma0", "dma1", "dma2", "dma3"; max-link-speed = <3>; - iommus = <&mmu600_pcie 0x0000>; num-lanes = <4>; phys = <&pcie30phy>; phy-names = "pcie-phy"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b-pcie-ep.dtso b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b-pcie-ep.dtso index 672d748fcc67..f229cb49da68 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b-pcie-ep.dtso +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b-pcie-ep.dtso @@ -23,3 +23,7 @@ vpcie3v3-supply = <&vcc3v3_pcie30>; status = "okay"; }; + +&mmu600_pcie { + status = "disabled"; +}; diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index cb7da4415599..1f25423de383 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1551,6 +1551,8 @@ CONFIG_PWM_VISCONTI=m CONFIG_SL28CPLD_INTC=y CONFIG_QCOM_PDC=y CONFIG_QCOM_MPM=y +CONFIG_TI_SCI_INTR_IRQCHIP=y +CONFIG_TI_SCI_INTA_IRQCHIP=y CONFIG_RESET_GPIO=m CONFIG_RESET_IMX7=y CONFIG_RESET_QCOM_AOSS=y diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index c6dff3e69539..07fbf5bf85a7 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -42,8 +42,8 @@ extern int huge_ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t pte, int dirty); #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); +extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT extern void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep); @@ -76,12 +76,22 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma, { unsigned long stride = huge_page_size(hstate_vma(vma)); - if (stride == PMD_SIZE) - __flush_tlb_range(vma, start, end, stride, false, 2); - else if (stride == PUD_SIZE) - __flush_tlb_range(vma, start, end, stride, false, 1); - else - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + switch (stride) { +#ifndef __PAGETABLE_PMD_FOLDED + case PUD_SIZE: + __flush_tlb_range(vma, start, end, PUD_SIZE, false, 1); + break; +#endif + case CONT_PMD_SIZE: + case PMD_SIZE: + __flush_tlb_range(vma, start, end, PMD_SIZE, false, 2); + break; + case CONT_PTE_SIZE: + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 3); + break; + default: + __flush_tlb_range(vma, start, end, PAGE_SIZE, false, TLBI_TTL_UNKNOWN); + } } #endif /* __ASM_HUGETLB_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8d94a6c0ed5c..c2417a424b98 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -119,7 +119,7 @@ #define TCR_EL2_IRGN0_MASK TCR_IRGN0_MASK #define TCR_EL2_T0SZ_MASK 0x3f #define TCR_EL2_MASK (TCR_EL2_TG0_MASK | TCR_EL2_SH0_MASK | \ - TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK | TCR_EL2_T0SZ_MASK) + TCR_EL2_ORGN0_MASK | TCR_EL2_IRGN0_MASK) /* VTCR_EL2 Registers bits */ #define VTCR_EL2_DS TCR_EL2_DS diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 47f2cf408eed..78ec1ef2cfe8 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -605,48 +605,6 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu) __cpacr_to_cptr_set(clr, set));\ } while (0) -static __always_inline void kvm_write_cptr_el2(u64 val) -{ - if (has_vhe() || has_hvhe()) - write_sysreg(val, cpacr_el1); - else - write_sysreg(val, cptr_el2); -} - -/* Resets the value of cptr_el2 when returning to the host. */ -static __always_inline void __kvm_reset_cptr_el2(struct kvm *kvm) -{ - u64 val; - - if (has_vhe()) { - val = (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN); - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN_EL1EN; - } else if (has_hvhe()) { - val = CPACR_EL1_FPEN; - - if (!kvm_has_sve(kvm) || !guest_owns_fp_regs()) - val |= CPACR_EL1_ZEN; - if (cpus_have_final_cap(ARM64_SME)) - val |= CPACR_EL1_SMEN; - } else { - val = CPTR_NVHE_EL2_RES1; - - if (kvm_has_sve(kvm) && guest_owns_fp_regs()) - val |= CPTR_EL2_TZ; - if (!cpus_have_final_cap(ARM64_SME)) - val |= CPTR_EL2_TSM; - } - - kvm_write_cptr_el2(val); -} - -#ifdef __KVM_NVHE_HYPERVISOR__ -#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2(kern_hyp_va((v)->kvm)) -#else -#define kvm_reset_cptr_el2(v) __kvm_reset_cptr_el2((v)->kvm) -#endif - /* * Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE * format if E2H isn't set. diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 7cfa024de4e3..d919557af5e5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -100,7 +100,7 @@ static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc, void *(*to_va)(phys_addr_t phys)) { - phys_addr_t *p = to_va(mc->head); + phys_addr_t *p = to_va(mc->head & PAGE_MASK); if (!mc->nr_pages) return NULL; @@ -615,8 +615,6 @@ struct cpu_sve_state { struct kvm_host_data { #define KVM_HOST_DATA_FLAG_HAS_SPE 0 #define KVM_HOST_DATA_FLAG_HAS_TRBE 1 -#define KVM_HOST_DATA_FLAG_HOST_SVE_ENABLED 2 -#define KVM_HOST_DATA_FLAG_HOST_SME_ENABLED 3 #define KVM_HOST_DATA_FLAG_TRBE_ENABLED 4 #define KVM_HOST_DATA_FLAG_EL1_TRACING_CONFIGURED 5 unsigned long flags; @@ -624,23 +622,13 @@ struct kvm_host_data { struct kvm_cpu_context host_ctxt; /* - * All pointers in this union are hyp VA. + * Hyp VA. * sve_state is only used in pKVM and if system_supports_sve(). */ - union { - struct user_fpsimd_state *fpsimd_state; - struct cpu_sve_state *sve_state; - }; - - union { - /* HYP VA pointer to the host storage for FPMR */ - u64 *fpmr_ptr; - /* - * Used by pKVM only, as it needs to provide storage - * for the host - */ - u64 fpmr; - }; + struct cpu_sve_state *sve_state; + + /* Used by pKVM only. */ + u64 fpmr; /* Ownership of the FP regs */ enum { @@ -1271,7 +1259,7 @@ int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, extern unsigned int __ro_after_init kvm_arm_vmid_bits; int __init kvm_arm_vmid_alloc_init(void); void __init kvm_arm_vmid_alloc_free(void); -bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); +void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid); void kvm_arm_vmid_clear_active(void); static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch) diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index d9c9218fa1fd..309942b06c5b 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -101,16 +101,18 @@ int populate_cache_leaves(unsigned int cpu) unsigned int level, idx; enum cache_type type; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf = this_cpu_ci->info_list; + struct cacheinfo *infos = this_cpu_ci->info_list; for (idx = 0, level = 1; level <= this_cpu_ci->num_levels && - idx < this_cpu_ci->num_leaves; idx++, level++) { + idx < this_cpu_ci->num_leaves; level++) { type = get_cache_type(level); if (type == CACHE_TYPE_SEPARATE) { - ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); - ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); + if (idx + 1 >= this_cpu_ci->num_leaves) + break; + ci_leaf_init(&infos[idx++], CACHE_TYPE_DATA, level); + ci_leaf_init(&infos[idx++], CACHE_TYPE_INST, level); } else { - ci_leaf_init(this_leaf++, type, level); + ci_leaf_init(&infos[idx++], type, level); } } return 0; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 4eb7c6698ae4..d561cf3b8ac7 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3091,6 +3091,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM, CAP_HWCAP, KERNEL_HWCAP_FLAGM), HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), HWCAP_CAP(ID_AA64ISAR0_EL1, RNDR, IMP, CAP_HWCAP, KERNEL_HWCAP_RNG), + HWCAP_CAP(ID_AA64ISAR3_EL1, FPRCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_FPRCVT), HWCAP_CAP(ID_AA64PFR0_EL1, FP, IMP, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(ID_AA64PFR0_EL1, FP, FP16, CAP_HWCAP, KERNEL_HWCAP_FPHP), HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD), @@ -3180,8 +3181,6 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8MM8, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8MM8), - HWCAP_CAP(ID_AA64SMFR0_EL1, SF8MM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8MM4), HWCAP_CAP(ID_AA64SMFR0_EL1, SBitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SBITPERM), HWCAP_CAP(ID_AA64SMFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_AES), HWCAP_CAP(ID_AA64SMFR0_EL1, SFEXPA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SFEXPA), @@ -3192,6 +3191,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP4), HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM8, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM8), + HWCAP_CAP(ID_AA64FPFR0_EL1, F8MM4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8MM4), HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3), HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2), #ifdef CONFIG_ARM64_POE diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 2b601d88762d..8370d55f0353 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1695,31 +1695,6 @@ void fpsimd_signal_preserve_current_state(void) } /* - * Called by KVM when entering the guest. - */ -void fpsimd_kvm_prepare(void) -{ - if (!system_supports_sve()) - return; - - /* - * KVM does not save host SVE state since we can only enter - * the guest from a syscall so the ABI means that only the - * non-saved SVE state needs to be saved. If we have left - * SVE enabled for performance reasons then update the task - * state to be FPSIMD only. - */ - get_cpu_fpsimd_context(); - - if (test_and_clear_thread_flag(TIF_SVE)) { - sve_to_fpsimd(current); - current->thread.fp_type = FP_STATE_FPSIMD; - } - - put_cpu_fpsimd_context(); -} - -/* * Associate current's FPSIMD context with this cpu * The caller must have ownership of the cpu FPSIMD context before calling * this function. diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 1a2c72f3e7f8..cb180684d10d 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -194,12 +194,19 @@ static void amu_fie_setup(const struct cpumask *cpus) int cpu; /* We are already set since the last insmod of cpufreq driver */ - if (unlikely(cpumask_subset(cpus, amu_fie_cpus))) + if (cpumask_available(amu_fie_cpus) && + unlikely(cpumask_subset(cpus, amu_fie_cpus))) return; - for_each_cpu(cpu, cpus) { + for_each_cpu(cpu, cpus) if (!freq_counters_valid(cpu)) return; + + if (!cpumask_available(amu_fie_cpus) && + !zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) { + WARN_ONCE(1, "Failed to allocate FIE cpumask for CPUs[%*pbl]\n", + cpumask_pr_args(cpus)); + return; } cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus); @@ -237,17 +244,8 @@ static struct notifier_block init_amu_fie_notifier = { static int __init init_amu_fie(void) { - int ret; - - if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) - return -ENOMEM; - - ret = cpufreq_register_notifier(&init_amu_fie_notifier, + return cpufreq_register_notifier(&init_amu_fie_notifier, CPUFREQ_POLICY_NOTIFIER); - if (ret) - free_cpumask_var(amu_fie_cpus); - - return ret; } core_initcall(init_amu_fie); diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 4ec32e86a8da..47ad6944f9f0 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -41,6 +41,7 @@ SECTIONS */ /DISCARD/ : { *(.note.GNU-stack .note.gnu.property) + *(.ARM.attributes) } .note : { *(.note.*) } :text :note diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index f84c71f04d9e..e73326bd3ff7 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -162,6 +162,7 @@ SECTIONS /DISCARD/ : { *(.interp .dynamic) *(.dynsym .dynstr .hash .gnu.hash) + *(.ARM.attributes) } . = KIMAGE_VADDR; diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index d3d243366536..70802e4c91cf 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -447,21 +447,19 @@ static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level) static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, struct arch_timer_context *timer_ctx) { - int ret; - kvm_timer_update_status(timer_ctx, new_level); timer_ctx->irq.level = new_level; trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx), timer_ctx->irq.level); - if (!userspace_irqchip(vcpu->kvm)) { - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu, - timer_irq(timer_ctx), - timer_ctx->irq.level, - timer_ctx); - WARN_ON(ret); - } + if (userspace_irqchip(vcpu->kvm)) + return; + + kvm_vgic_inject_irq(vcpu->kvm, vcpu, + timer_irq(timer_ctx), + timer_ctx->irq.level, + timer_ctx); } /* Only called for a fully emulated timer */ @@ -471,10 +469,8 @@ static void timer_emulate(struct arch_timer_context *ctx) trace_kvm_timer_emulate(ctx, should_fire); - if (should_fire != ctx->irq.level) { + if (should_fire != ctx->irq.level) kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); - return; - } kvm_timer_update_status(ctx, should_fire); @@ -761,21 +757,6 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu, timer_irq(map->direct_ptimer), &arch_timer_irq_ops); WARN_ON_ONCE(ret); - - /* - * The virtual offset behaviour is "interesting", as it - * always applies when HCR_EL2.E2H==0, but only when - * accessed from EL1 when HCR_EL2.E2H==1. So make sure we - * track E2H when putting the HV timer in "direct" mode. - */ - if (map->direct_vtimer == vcpu_hvtimer(vcpu)) { - struct arch_timer_offset *offs = &map->direct_vtimer->offset; - - if (vcpu_el2_e2h_is_set(vcpu)) - offs->vcpu_offset = NULL; - else - offs->vcpu_offset = &__vcpu_sys_reg(vcpu, CNTVOFF_EL2); - } } } @@ -976,31 +957,21 @@ void kvm_timer_sync_nested(struct kvm_vcpu *vcpu) * which allows trapping of the timer registers even with NV2. * Still, this is still worse than FEAT_NV on its own. Meh. */ - if (!vcpu_el2_e2h_is_set(vcpu)) { - if (cpus_have_final_cap(ARM64_HAS_ECV)) - return; - - /* - * A non-VHE guest hypervisor doesn't have any direct access - * to its timers: the EL2 registers trap (and the HW is - * fully emulated), while the EL0 registers access memory - * despite the access being notionally direct. Boo. - * - * We update the hardware timer registers with the - * latest value written by the guest to the VNCR page - * and let the hardware take care of the rest. - */ - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL); - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL); - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CTL_EL0), SYS_CNTP_CTL); - write_sysreg_el0(__vcpu_sys_reg(vcpu, CNTP_CVAL_EL0), SYS_CNTP_CVAL); - } else { + if (!cpus_have_final_cap(ARM64_HAS_ECV)) { /* * For a VHE guest hypervisor, the EL2 state is directly - * stored in the host EL1 timers, while the emulated EL0 + * stored in the host EL1 timers, while the emulated EL1 * state is stored in the VNCR page. The latter could have * been updated behind our back, and we must reset the * emulation of the timers. + * + * A non-VHE guest hypervisor doesn't have any direct access + * to its timers: the EL2 registers trap despite being + * notionally direct (we use the EL1 HW, as for VHE), while + * the EL1 registers access memory. + * + * In both cases, process the emulated timers on each guest + * exit. Boo. */ struct timer_map map; get_timer_map(vcpu, &map); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 646e806c6ca6..0160b4924351 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -560,6 +560,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) last_ran = this_cpu_ptr(mmu->last_vcpu_ran); /* + * Ensure a VMID is allocated for the MMU before programming VTTBR_EL2, + * which happens eagerly in VHE. + * + * Also, the VMID allocator only preserves VMIDs that are active at the + * time of rollover, so KVM might need to grab a new VMID for the MMU if + * this is called from kvm_sched_in(). + */ + kvm_arm_vmid_update(&mmu->vmid); + + /* * We guarantee that both TLBs and I-cache are private to each * vcpu. If detecting that a vcpu from the same VM has * previously run on the same physical CPU, call into the @@ -1138,18 +1148,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ preempt_disable(); - /* - * The VMID allocator only tracks active VMIDs per - * physical CPU, and therefore the VMID allocated may not be - * preserved on VMID roll-over if the task was preempted, - * making a thread's VMID inactive. So we need to call - * kvm_arm_vmid_update() in non-premptible context. - */ - if (kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid) && - has_vhe()) - __load_stage2(vcpu->arch.hw_mmu, - vcpu->arch.hw_mmu->arch); - kvm_pmu_flush_hwstate(vcpu); local_irq_disable(); @@ -1980,7 +1978,7 @@ static int kvm_init_vector_slots(void) static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) { struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); - unsigned long tcr, ips; + unsigned long tcr; /* * Calculate the raw per-cpu offset without a translation from the @@ -1994,19 +1992,18 @@ static void __init cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) params->mair_el2 = read_sysreg(mair_el1); tcr = read_sysreg(tcr_el1); - ips = FIELD_GET(TCR_IPS_MASK, tcr); if (cpus_have_final_cap(ARM64_KVM_HVHE)) { + tcr &= ~(TCR_HD | TCR_HA | TCR_A1 | TCR_T0SZ_MASK); tcr |= TCR_EPD1_MASK; } else { + unsigned long ips = FIELD_GET(TCR_IPS_MASK, tcr); + tcr &= TCR_EL2_MASK; - tcr |= TCR_EL2_RES1; + tcr |= TCR_EL2_RES1 | FIELD_PREP(TCR_EL2_PS_MASK, ips); + if (lpa2_is_enabled()) + tcr |= TCR_EL2_DS; } - tcr &= ~TCR_T0SZ_MASK; tcr |= TCR_T0SZ(hyp_va_bits); - tcr &= ~TCR_EL2_PS_MASK; - tcr |= FIELD_PREP(TCR_EL2_PS_MASK, ips); - if (lpa2_is_enabled()) - tcr |= TCR_EL2_DS; params->tcr_el2 = tcr; params->pgd_pa = kvm_mmu_get_httbr(); @@ -2290,6 +2287,19 @@ static int __init init_subsystems(void) break; case -ENODEV: case -ENXIO: + /* + * No VGIC? No pKVM for you. + * + * Protected mode assumes that VGICv3 is present, so no point + * in trying to hobble along if vgic initialization fails. + */ + if (is_protected_kvm_enabled()) + goto out; + + /* + * Otherwise, userspace could choose to implement a GIC for its + * guest on non-cooperative hardware. + */ vgic_present = false; err = 0; break; @@ -2400,6 +2410,13 @@ static void kvm_hyp_init_symbols(void) kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1); kvm_nvhe_sym(__icache_flags) = __icache_flags; kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; + + /* + * Flush entire BSS since part of its data containing init symbols is read + * while the MMU is off. + */ + kvm_flush_dcache_to_poc(kvm_ksym_ref(__hyp_bss_start), + kvm_ksym_ref(__hyp_bss_end) - kvm_ksym_ref(__hyp_bss_start)); } static int __init kvm_hyp_init_protection(u32 hyp_va_bits) @@ -2461,14 +2478,6 @@ static void finalize_init_hyp_mode(void) per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = kern_hyp_va(sve_state); } - } else { - for_each_possible_cpu(cpu) { - struct user_fpsimd_state *fpsimd_state; - - fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs; - per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state = - kern_hyp_va(fpsimd_state); - } } } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 4d3d1a2eb157..7f6e43d25691 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -54,50 +54,18 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) if (!system_supports_fpsimd()) return; - fpsimd_kvm_prepare(); - /* - * We will check TIF_FOREIGN_FPSTATE just before entering the - * guest in kvm_arch_vcpu_ctxflush_fp() and override this to - * FP_STATE_FREE if the flag set. + * Ensure that any host FPSIMD/SVE/SME state is saved and unbound such + * that the host kernel is responsible for restoring this state upon + * return to userspace, and the hyp code doesn't need to save anything. + * + * When the host may use SME, fpsimd_save_and_flush_cpu_state() ensures + * that PSTATE.{SM,ZA} == {0,0}. */ - *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; - *host_data_ptr(fpsimd_state) = kern_hyp_va(¤t->thread.uw.fpsimd_state); - *host_data_ptr(fpmr_ptr) = kern_hyp_va(¤t->thread.uw.fpmr); - - host_data_clear_flag(HOST_SVE_ENABLED); - if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) - host_data_set_flag(HOST_SVE_ENABLED); - - if (system_supports_sme()) { - host_data_clear_flag(HOST_SME_ENABLED); - if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) - host_data_set_flag(HOST_SME_ENABLED); - - /* - * If PSTATE.SM is enabled then save any pending FP - * state and disable PSTATE.SM. If we leave PSTATE.SM - * enabled and the guest does not enable SME via - * CPACR_EL1.SMEN then operations that should be valid - * may generate SME traps from EL1 to EL1 which we - * can't intercept and which would confuse the guest. - * - * Do the same for PSTATE.ZA in the case where there - * is state in the registers which has not already - * been saved, this is very unlikely to happen. - */ - if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) { - *host_data_ptr(fp_owner) = FP_STATE_FREE; - fpsimd_save_and_flush_cpu_state(); - } - } + fpsimd_save_and_flush_cpu_state(); + *host_data_ptr(fp_owner) = FP_STATE_FREE; - /* - * If normal guests gain SME support, maintain this behavior for pKVM - * guests, which don't support SME. - */ - WARN_ON(is_protected_kvm_enabled() && system_supports_sme() && - read_sysreg_s(SYS_SVCR)); + WARN_ON_ONCE(system_supports_sme() && read_sysreg_s(SYS_SVCR)); } /* @@ -162,52 +130,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); - /* - * If we have VHE then the Hyp code will reset CPACR_EL1 to - * the default value and we need to reenable SME. - */ - if (has_vhe() && system_supports_sme()) { - /* Also restore EL0 state seen on entry */ - if (host_data_test_flag(HOST_SME_ENABLED)) - sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_SMEN); - else - sysreg_clear_set(CPACR_EL1, - CPACR_EL1_SMEN_EL0EN, - CPACR_EL1_SMEN_EL1EN); - isb(); - } - if (guest_owns_fp_regs()) { - if (vcpu_has_sve(vcpu)) { - u64 zcr = read_sysreg_el1(SYS_ZCR); - - /* - * If the vCPU is in the hyp context then ZCR_EL1 is - * loaded with its vEL2 counterpart. - */ - __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr; - - /* - * Restore the VL that was saved when bound to the CPU, - * which is the maximum VL for the guest. Because the - * layout of the data when saving the sve state depends - * on the VL, we need to use a consistent (i.e., the - * maximum) VL. - * Note that this means that at guest exit ZCR_EL1 is - * not necessarily the same as on guest entry. - * - * ZCR_EL2 holds the guest hypervisor's VL when running - * a nested guest, which could be smaller than the - * max for the vCPU. Similar to above, we first need to - * switch to a VL consistent with the layout of the - * vCPU's SVE state. KVM support for NV implies VHE, so - * using the ZCR_EL1 alias is safe. - */ - if (!has_vhe() || (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) - sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, - SYS_ZCR_EL1); - } - /* * Flush (save and invalidate) the fpsimd/sve state so that if * the host tries to use fpsimd/sve, it's not using stale data @@ -219,18 +142,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) * when needed. */ fpsimd_save_and_flush_cpu_state(); - } else if (has_vhe() && system_supports_sve()) { - /* - * The FPSIMD/SVE state in the CPU has not been touched, and we - * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been - * reset by kvm_reset_cptr_el2() in the Hyp code, disabling SVE - * for EL0. To avoid spurious traps, restore the trap state - * seen by kvm_arch_vcpu_load_fp(): - */ - if (host_data_test_flag(HOST_SVE_ENABLED)) - sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); - else - sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); } local_irq_restore(flags); diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 4433a234aa9b..9f4e8d68ab50 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -44,6 +44,11 @@ alternative_if ARM64_HAS_RAS_EXTN alternative_else_nop_endif mrs x1, isr_el1 cbz x1, 1f + + // Ensure that __guest_enter() always provides a context + // synchronization event so that callers don't need ISBs for anything + // that would usually be synchonized by the ERET. + isb mov x0, #ARM_EXCEPTION_IRQ ret diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f838a45665f2..23bbe28eaaf9 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -326,7 +326,7 @@ static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault); } -static bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_mops(struct kvm_vcpu *vcpu, u64 *exit_code) { *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); arm64_mops_reset_regs(vcpu_gp_regs(vcpu), vcpu->arch.fault.esr_el2); @@ -375,7 +375,87 @@ static inline void __hyp_sve_save_host(void) true); } -static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu); +static inline void fpsimd_lazy_switch_to_guest(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs()) + return; + + if (vcpu_has_sve(vcpu)) { + /* A guest hypervisor may restrict the effective max VL. */ + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) + zcr_el2 = __vcpu_sys_reg(vcpu, ZCR_EL2); + else + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)); + write_sysreg_el1(zcr_el1, SYS_ZCR); + } +} + +static inline void fpsimd_lazy_switch_to_host(struct kvm_vcpu *vcpu) +{ + u64 zcr_el1, zcr_el2; + + if (!guest_owns_fp_regs()) + return; + + /* + * When the guest owns the FP regs, we know that guest+hyp traps for + * any FPSIMD/SVE/SME features exposed to the guest have been disabled + * by either fpsimd_lazy_switch_to_guest() or kvm_hyp_handle_fpsimd() + * prior to __guest_entry(). As __guest_entry() guarantees a context + * synchronization event, we don't need an ISB here to avoid taking + * traps for anything that was exposed to the guest. + */ + if (vcpu_has_sve(vcpu)) { + zcr_el1 = read_sysreg_el1(SYS_ZCR); + __vcpu_sys_reg(vcpu, vcpu_sve_zcr_elx(vcpu)) = zcr_el1; + + /* + * The guest's state is always saved using the guest's max VL. + * Ensure that the host has the guest's max VL active such that + * the host can save the guest's state lazily, but don't + * artificially restrict the host to the guest's max VL. + */ + if (has_vhe()) { + zcr_el2 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + } else { + zcr_el2 = sve_vq_from_vl(kvm_host_sve_max_vl) - 1; + write_sysreg_el2(zcr_el2, SYS_ZCR); + + zcr_el1 = vcpu_sve_max_vq(vcpu) - 1; + write_sysreg_el1(zcr_el1, SYS_ZCR); + } + } +} + +static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) +{ + /* + * Non-protected kvm relies on the host restoring its sve state. + * Protected kvm restores the host's sve state as not to reveal that + * fpsimd was used by a guest nor leak upper sve bits. + */ + if (system_supports_sve()) { + __hyp_sve_save_host(); + + /* Re-enable SVE traps if not supported for the guest vcpu. */ + if (!vcpu_has_sve(vcpu)) + cpacr_clear_set(CPACR_EL1_ZEN, 0); + + } else { + __fpsimd_save_state(host_data_ptr(host_ctxt.fp_regs)); + } + + if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) + *host_data_ptr(fpmr) = read_sysreg_s(SYS_FPMR); +} + /* * We trap the first access to the FP/SIMD to save the host context and @@ -383,7 +463,7 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu); * If FP/SIMD is not implemented, handle the trap and inject an undefined * instruction exception to the guest. Similarly for trapped SVE accesses. */ -static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) { bool sve_guest; u8 esr_ec; @@ -425,7 +505,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) isb(); /* Write out the host state if it's in the registers */ - if (host_owns_fp_regs()) + if (is_protected_kvm_enabled() && host_owns_fp_regs()) kvm_hyp_save_fpsimd_host(vcpu); /* Restore the guest state */ @@ -501,9 +581,22 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) return true; } +/* Open-coded version of timer_get_offset() to allow for kern_hyp_va() */ +static inline u64 hyp_timer_get_offset(struct arch_timer_context *ctxt) +{ + u64 offset = 0; + + if (ctxt->offset.vm_offset) + offset += *kern_hyp_va(ctxt->offset.vm_offset); + if (ctxt->offset.vcpu_offset) + offset += *kern_hyp_va(ctxt->offset.vcpu_offset); + + return offset; +} + static inline u64 compute_counter_value(struct arch_timer_context *ctxt) { - return arch_timer_read_cntpct_el0() - timer_get_offset(ctxt); + return arch_timer_read_cntpct_el0() - hyp_timer_get_offset(ctxt); } static bool kvm_handle_cntxct(struct kvm_vcpu *vcpu) @@ -587,7 +680,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu) return true; } -static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) { if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && handle_tx2_tvm(vcpu)) @@ -607,7 +700,7 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } -static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) { if (static_branch_unlikely(&vgic_v3_cpuif_trap) && __vgic_v3_perform_cpuif_access(vcpu) == 1) @@ -616,19 +709,18 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } -static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, + u64 *exit_code) { if (!__populate_fault_info(vcpu)) return true; return false; } -static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) - __alias(kvm_hyp_handle_memory_fault); -static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code) - __alias(kvm_hyp_handle_memory_fault); +#define kvm_hyp_handle_iabt_low kvm_hyp_handle_memory_fault +#define kvm_hyp_handle_watchpt_low kvm_hyp_handle_memory_fault -static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) { if (kvm_hyp_handle_memory_fault(vcpu, exit_code)) return true; @@ -658,23 +750,16 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); - -static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code); - /* * Allow the hypervisor to handle the exit with an exit handler if it has one. * * Returns true if the hypervisor handled the exit, and control should go back * to the guest, or false if it hasn't. */ -static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code, + const exit_handler_fn *handlers) { - const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); - exit_handler_fn fn; - - fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; - + exit_handler_fn fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; if (fn) return fn(vcpu, exit_code); @@ -704,20 +789,9 @@ static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code * the guest, false when we should restore the host state and return to the * main run loop. */ -static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool __fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code, + const exit_handler_fn *handlers) { - /* - * Save PSTATE early so that we can evaluate the vcpu mode - * early on. - */ - synchronize_vcpu_pstate(vcpu, exit_code); - - /* - * Check whether we want to repaint the state one way or - * another. - */ - early_exit_filter(vcpu, exit_code); - if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); @@ -747,7 +821,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) goto exit; /* Check if there's an exit handler and allow it to handle the exit. */ - if (kvm_hyp_handle_exit(vcpu, exit_code)) + if (kvm_hyp_handle_exit(vcpu, exit_code, handlers)) goto guest; exit: /* Return to the host kernel and handle the exit */ diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 5c134520e180..2c37680d954c 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -5,6 +5,7 @@ */ #include <hyp/adjust_pc.h> +#include <hyp/switch.h> #include <asm/pgtable-types.h> #include <asm/kvm_asm.h> @@ -83,7 +84,7 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) if (system_supports_sve()) __hyp_sve_restore_host(); else - __fpsimd_restore_state(*host_data_ptr(fpsimd_state)); + __fpsimd_restore_state(host_data_ptr(host_ctxt.fp_regs)); if (has_fpmr) write_sysreg_s(*host_data_ptr(fpmr), SYS_FPMR); @@ -91,11 +92,34 @@ static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; } +static void flush_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + hyp_vcpu->vcpu.arch.debug_owner = host_vcpu->arch.debug_owner; + + if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu)) + hyp_vcpu->vcpu.arch.vcpu_debug_state = host_vcpu->arch.vcpu_debug_state; + else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu)) + hyp_vcpu->vcpu.arch.external_debug_state = host_vcpu->arch.external_debug_state; +} + +static void sync_debug_state(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + if (kvm_guest_owns_debug_regs(&hyp_vcpu->vcpu)) + host_vcpu->arch.vcpu_debug_state = hyp_vcpu->vcpu.arch.vcpu_debug_state; + else if (kvm_host_owns_debug_regs(&hyp_vcpu->vcpu)) + host_vcpu->arch.external_debug_state = hyp_vcpu->vcpu.arch.external_debug_state; +} + static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) { struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; fpsimd_sve_flush(); + flush_debug_state(hyp_vcpu); hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt; @@ -123,6 +147,7 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) unsigned int i; fpsimd_sve_sync(&hyp_vcpu->vcpu); + sync_debug_state(hyp_vcpu); host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt; @@ -200,8 +225,12 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) sync_hyp_vcpu(hyp_vcpu); } else { + struct kvm_vcpu *vcpu = kern_hyp_va(host_vcpu); + /* The host is fully trusted, run its vCPU directly. */ - ret = __kvm_vcpu_run(kern_hyp_va(host_vcpu)); + fpsimd_lazy_switch_to_guest(vcpu); + ret = __kvm_vcpu_run(vcpu); + fpsimd_lazy_switch_to_host(vcpu); } out: cpu_reg(host_ctxt, 1) = ret; @@ -651,12 +680,6 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_SMC64: handle_host_smc(host_ctxt); break; - case ESR_ELx_EC_SVE: - cpacr_clear_set(0, CPACR_EL1_ZEN); - isb(); - sve_cond_update_zcr_vq(sve_vq_from_vl(kvm_host_sve_max_vl) - 1, - SYS_ZCR_EL2); - break; case ESR_ELx_EC_IABT_LOW: case ESR_ELx_EC_DABT_LOW: handle_host_mem_abort(host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 7ad7b133b81a..19c3c631708c 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -943,10 +943,10 @@ static int __check_host_shared_guest(struct pkvm_hyp_vm *vm, u64 *__phys, u64 ip ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); if (ret) return ret; - if (level != KVM_PGTABLE_LAST_LEVEL) - return -E2BIG; if (!kvm_pte_valid(pte)) return -ENOENT; + if (level != KVM_PGTABLE_LAST_LEVEL) + return -E2BIG; state = guest_get_page_state(pte, ipa); if (state != PKVM_PAGE_SHARED_BORROWED) @@ -998,63 +998,73 @@ unlock: return ret; } -int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) +static void assert_host_shared_guest(struct pkvm_hyp_vm *vm, u64 ipa) { - struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); - u64 ipa = hyp_pfn_to_phys(gfn); u64 phys; int ret; - if (prot & ~KVM_PGTABLE_PROT_RWX) - return -EINVAL; + if (!IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) + return; host_lock_component(); guest_lock_component(vm); ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); guest_unlock_component(vm); host_unlock_component(); - return ret; + WARN_ON(ret && ret != -ENOENT); } -int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm) +int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot) { + struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); u64 ipa = hyp_pfn_to_phys(gfn); - u64 phys; int ret; - host_lock_component(); - guest_lock_component(vm); + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; - ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE); + if (prot & ~KVM_PGTABLE_PROT_RWX) + return -EINVAL; + assert_host_shared_guest(vm, ipa); + guest_lock_component(vm); + ret = kvm_pgtable_stage2_relax_perms(&vm->pgt, ipa, prot, 0); guest_unlock_component(vm); - host_unlock_component(); return ret; } -int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm) +int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *vm) { u64 ipa = hyp_pfn_to_phys(gfn); - u64 phys; int ret; - host_lock_component(); + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; + + assert_host_shared_guest(vm, ipa); guest_lock_component(vm); + ret = kvm_pgtable_stage2_wrprotect(&vm->pgt, ipa, PAGE_SIZE); + guest_unlock_component(vm); - ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold); + return ret; +} + +int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm) +{ + u64 ipa = hyp_pfn_to_phys(gfn); + int ret; + + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; + assert_host_shared_guest(vm, ipa); + guest_lock_component(vm); + ret = kvm_pgtable_stage2_test_clear_young(&vm->pgt, ipa, PAGE_SIZE, mkold); guest_unlock_component(vm); - host_unlock_component(); return ret; } @@ -1063,18 +1073,14 @@ int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu) { struct pkvm_hyp_vm *vm = pkvm_hyp_vcpu_to_hyp_vm(vcpu); u64 ipa = hyp_pfn_to_phys(gfn); - u64 phys; - int ret; - - host_lock_component(); - guest_lock_component(vm); - ret = __check_host_shared_guest(vm, &phys, ipa); - if (!ret) - kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); + if (pkvm_hyp_vm_is_protected(vm)) + return -EPERM; + assert_host_shared_guest(vm, ipa); + guest_lock_component(vm); + kvm_pgtable_stage2_mkyoung(&vm->pgt, ipa, 0); guest_unlock_component(vm); - host_unlock_component(); - return ret; + return 0; } diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6c846d033d24..7d2ba6ef0261 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -39,6 +39,9 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) { u64 val = CPTR_EL2_TAM; /* Same bit irrespective of E2H */ + if (!guest_owns_fp_regs()) + __activate_traps_fpsimd32(vcpu); + if (has_hvhe()) { val |= CPACR_EL1_TTA; @@ -47,6 +50,8 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) if (vcpu_has_sve(vcpu)) val |= CPACR_EL1_ZEN; } + + write_sysreg(val, cpacr_el1); } else { val |= CPTR_EL2_TTA | CPTR_NVHE_EL2_RES1; @@ -61,12 +66,32 @@ static void __activate_cptr_traps(struct kvm_vcpu *vcpu) if (!guest_owns_fp_regs()) val |= CPTR_EL2_TFP; + + write_sysreg(val, cptr_el2); } +} - if (!guest_owns_fp_regs()) - __activate_traps_fpsimd32(vcpu); +static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + if (has_hvhe()) { + u64 val = CPACR_EL1_FPEN; + + if (cpus_have_final_cap(ARM64_SVE)) + val |= CPACR_EL1_ZEN; + if (cpus_have_final_cap(ARM64_SME)) + val |= CPACR_EL1_SMEN; - kvm_write_cptr_el2(val); + write_sysreg(val, cpacr_el1); + } else { + u64 val = CPTR_NVHE_EL2_RES1; + + if (!cpus_have_final_cap(ARM64_SVE)) + val |= CPTR_EL2_TZ; + if (!cpus_have_final_cap(ARM64_SME)) + val |= CPTR_EL2_TSM; + + write_sysreg(val, cptr_el2); + } } static void __activate_traps(struct kvm_vcpu *vcpu) @@ -119,7 +144,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) write_sysreg(this_cpu_ptr(&kvm_init_params)->hcr_el2, hcr_el2); - kvm_reset_cptr_el2(vcpu); + __deactivate_cptr_traps(vcpu); write_sysreg(__kvm_hyp_host_vector, vbar_el2); } @@ -192,34 +217,6 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) kvm_handle_pvm_sysreg(vcpu, exit_code)); } -static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) -{ - /* - * Non-protected kvm relies on the host restoring its sve state. - * Protected kvm restores the host's sve state as not to reveal that - * fpsimd was used by a guest nor leak upper sve bits. - */ - if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) { - __hyp_sve_save_host(); - - /* Re-enable SVE traps if not supported for the guest vcpu. */ - if (!vcpu_has_sve(vcpu)) - cpacr_clear_set(CPACR_EL1_ZEN, 0); - - } else { - __fpsimd_save_state(*host_data_ptr(fpsimd_state)); - } - - if (kvm_has_fpmr(kern_hyp_va(vcpu->kvm))) { - u64 val = read_sysreg_s(SYS_FPMR); - - if (unlikely(is_protected_kvm_enabled())) - *host_data_ptr(fpmr) = val; - else - **host_data_ptr(fpmr_ptr) = val; - } -} - static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, @@ -251,19 +248,21 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) return hyp_exit_handlers; } -/* - * Some guests (e.g., protected VMs) are not be allowed to run in AArch32. - * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a - * guest from dropping to AArch32 EL0 if implemented by the CPU. If the - * hypervisor spots a guest in such a state ensure it is handled, and don't - * trust the host to spot or fix it. The check below is based on the one in - * kvm_arch_vcpu_ioctl_run(). - * - * Returns false if the guest ran in AArch32 when it shouldn't have, and - * thus should exit to the host, or true if a the guest run loop can continue. - */ -static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { + const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); + + synchronize_vcpu_pstate(vcpu, exit_code); + + /* + * Some guests (e.g., protected VMs) are not be allowed to run in + * AArch32. The ARMv8 architecture does not give the hypervisor a + * mechanism to prevent a guest from dropping to AArch32 EL0 if + * implemented by the CPU. If the hypervisor spots a guest in such a + * state ensure it is handled, and don't trust the host to spot or fix + * it. The check below is based on the one in + * kvm_arch_vcpu_ioctl_run(). + */ if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) { /* * As we have caught the guest red-handed, decide that it isn't @@ -276,6 +275,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT); *exit_code |= ARM_EXCEPTION_IL; } + + return __fixup_guest_exit(vcpu, exit_code, handlers); } /* Switch to the guest for legacy non-VHE systems */ diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index b5b9dbaf1fdd..647737d6e8d0 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -136,6 +136,16 @@ write: write_sysreg(val, cpacr_el1); } +static void __deactivate_cptr_traps(struct kvm_vcpu *vcpu) +{ + u64 val = CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN; + + if (cpus_have_final_cap(ARM64_SME)) + val |= CPACR_EL1_SMEN_EL1EN; + + write_sysreg(val, cpacr_el1); +} + static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; @@ -207,7 +217,7 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) */ asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); - kvm_reset_cptr_el2(vcpu); + __deactivate_cptr_traps(vcpu); if (!arm64_kernel_unmapped_at_el0()) host_vectors = __this_cpu_read(this_cpu_vector); @@ -413,14 +423,6 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code) return true; } -static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) -{ - __fpsimd_save_state(*host_data_ptr(fpsimd_state)); - - if (kvm_has_fpmr(vcpu->kvm)) - **host_data_ptr(fpmr_ptr) = read_sysreg_s(SYS_FPMR); -} - static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code) { int ret = -EINVAL; @@ -538,13 +540,10 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { - return hyp_exit_handlers; -} + synchronize_vcpu_pstate(vcpu, exit_code); -static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) -{ /* * If we were in HYP context on entry, adjust the PSTATE view * so that the usual helpers work correctly. @@ -564,6 +563,8 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) *vcpu_cpsr(vcpu) &= ~(PSR_MODE_MASK | PSR_MODE32_BIT); *vcpu_cpsr(vcpu) |= mode; } + + return __fixup_guest_exit(vcpu, exit_code, hyp_exit_handlers); } /* Switch to the guest for VHE systems running in EL2 */ @@ -578,6 +579,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_save_host_state_vhe(host_ctxt); + fpsimd_lazy_switch_to_guest(vcpu); + /* * Note that ARM erratum 1165522 requires us to configure both stage 1 * and stage 2 translation for the guest context before we clear @@ -602,6 +605,8 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) __deactivate_traps(vcpu); + fpsimd_lazy_switch_to_host(vcpu); + sysreg_restore_host_state_vhe(host_ctxt); if (guest_owns_fp_regs()) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 33d2ace68665..0c9387d2f507 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -67,26 +67,27 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu) if (!tmp) return -ENOMEM; + swap(kvm->arch.nested_mmus, tmp); + /* * If we went through a realocation, adjust the MMU back-pointers in * the previously initialised kvm_pgtable structures. */ if (kvm->arch.nested_mmus != tmp) for (int i = 0; i < kvm->arch.nested_mmus_size; i++) - tmp[i].pgt->mmu = &tmp[i]; + kvm->arch.nested_mmus[i].pgt->mmu = &kvm->arch.nested_mmus[i]; for (int i = kvm->arch.nested_mmus_size; !ret && i < num_mmus; i++) - ret = init_nested_s2_mmu(kvm, &tmp[i]); + ret = init_nested_s2_mmu(kvm, &kvm->arch.nested_mmus[i]); if (ret) { for (int i = kvm->arch.nested_mmus_size; i < num_mmus; i++) - kvm_free_stage2_pgd(&tmp[i]); + kvm_free_stage2_pgd(&kvm->arch.nested_mmus[i]); return ret; } kvm->arch.nested_mmus_size = num_mmus; - kvm->arch.nested_mmus = tmp; return 0; } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index f6cd1ea7fb55..82430c1e1dd0 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1452,6 +1452,16 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } +static bool access_hv_timer(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!vcpu_el2_e2h_is_set(vcpu)) + return undef_access(vcpu, p, r); + + return access_arch_timer(vcpu, p, r); +} + static s64 kvm_arm64_ftr_safe_value(u32 id, const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { @@ -3103,9 +3113,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(CNTHP_CTL_EL2, access_arch_timer, reset_val, 0), EL2_REG(CNTHP_CVAL_EL2, access_arch_timer, reset_val, 0), - { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_arch_timer }, - EL2_REG(CNTHV_CTL_EL2, access_arch_timer, reset_val, 0), - EL2_REG(CNTHV_CVAL_EL2, access_arch_timer, reset_val, 0), + { SYS_DESC(SYS_CNTHV_TVAL_EL2), access_hv_timer }, + EL2_REG(CNTHV_CTL_EL2, access_hv_timer, reset_val, 0), + EL2_REG(CNTHV_CVAL_EL2, access_hv_timer, reset_val, 0), { SYS_DESC(SYS_CNTKCTL_EL12), access_cntkctl_el12 }, diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index bc7e22ab5d81..775461cf2d2d 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -34,9 +34,9 @@ * * CPU Interface: * - * - kvm_vgic_vcpu_init(): initialization of static data that - * doesn't depend on any sizing information or emulation type. No - * allocation is allowed there. + * - kvm_vgic_vcpu_init(): initialization of static data that doesn't depend + * on any sizing information. Private interrupts are allocated if not + * already allocated at vgic-creation time. */ /* EARLY INIT */ @@ -58,6 +58,8 @@ void kvm_vgic_early_init(struct kvm *kvm) /* CREATION */ +static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type); + /** * kvm_vgic_create: triggered by the instantiation of the VGIC device by * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only) @@ -112,6 +114,22 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) goto out_unlock; } + kvm_for_each_vcpu(i, vcpu, kvm) { + ret = vgic_allocate_private_irqs_locked(vcpu, type); + if (ret) + break; + } + + if (ret) { + kvm_for_each_vcpu(i, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + kfree(vgic_cpu->private_irqs); + vgic_cpu->private_irqs = NULL; + } + + goto out_unlock; + } + kvm->arch.vgic.in_kernel = true; kvm->arch.vgic.vgic_model = type; @@ -180,7 +198,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) return 0; } -static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu) +static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; int i; @@ -218,17 +236,28 @@ static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu) /* PPIs */ irq->config = VGIC_CONFIG_LEVEL; } + + switch (type) { + case KVM_DEV_TYPE_ARM_VGIC_V3: + irq->group = 1; + irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: + irq->group = 0; + irq->targets = BIT(vcpu->vcpu_id); + break; + } } return 0; } -static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu) +static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu, u32 type) { int ret; mutex_lock(&vcpu->kvm->arch.config_lock); - ret = vgic_allocate_private_irqs_locked(vcpu); + ret = vgic_allocate_private_irqs_locked(vcpu, type); mutex_unlock(&vcpu->kvm->arch.config_lock); return ret; @@ -258,7 +287,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) if (!irqchip_in_kernel(vcpu->kvm)) return 0; - ret = vgic_allocate_private_irqs(vcpu); + ret = vgic_allocate_private_irqs(vcpu, dist->vgic_model); if (ret) return ret; @@ -295,7 +324,7 @@ int vgic_init(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; struct kvm_vcpu *vcpu; - int ret = 0, i; + int ret = 0; unsigned long idx; lockdep_assert_held(&kvm->arch.config_lock); @@ -315,35 +344,6 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; - /* Initialize groups on CPUs created before the VGIC type was known */ - kvm_for_each_vcpu(idx, vcpu, kvm) { - ret = vgic_allocate_private_irqs_locked(vcpu); - if (ret) - goto out; - - for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); - - switch (dist->vgic_model) { - case KVM_DEV_TYPE_ARM_VGIC_V3: - irq->group = 1; - irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu); - break; - case KVM_DEV_TYPE_ARM_VGIC_V2: - irq->group = 0; - irq->targets = 1U << idx; - break; - default: - ret = -EINVAL; - } - - vgic_put_irq(kvm, irq); - - if (ret) - goto out; - } - } - /* * If we have GICv4.1 enabled, unconditionally request enable the * v4 support so that we get HW-accelerated vSGIs. Otherwise, only diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c index 806223b7022a..7fe8ba1a2851 100644 --- a/arch/arm64/kvm/vmid.c +++ b/arch/arm64/kvm/vmid.c @@ -135,11 +135,10 @@ void kvm_arm_vmid_clear_active(void) atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID); } -bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) +void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) { unsigned long flags; u64 vmid, old_active_vmid; - bool updated = false; vmid = atomic64_read(&kvm_vmid->id); @@ -157,21 +156,17 @@ bool kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid) if (old_active_vmid != 0 && vmid_gen_match(vmid) && 0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids), old_active_vmid, vmid)) - return false; + return; raw_spin_lock_irqsave(&cpu_vmid_lock, flags); /* Check that our VMID belongs to the current generation. */ vmid = atomic64_read(&kvm_vmid->id); - if (!vmid_gen_match(vmid)) { + if (!vmid_gen_match(vmid)) vmid = new_vmid(kvm_vmid); - updated = true; - } atomic64_set(this_cpu_ptr(&active_vmids), vmid); raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags); - - return updated; } /* diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 98a2a0e64e25..b3a7fafe8892 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -100,20 +100,11 @@ static int find_num_contig(struct mm_struct *mm, unsigned long addr, static inline int num_contig_ptes(unsigned long size, size_t *pgsize) { - int contig_ptes = 0; + int contig_ptes = 1; *pgsize = size; switch (size) { -#ifndef __PAGETABLE_PMD_FOLDED - case PUD_SIZE: - if (pud_sect_supported()) - contig_ptes = 1; - break; -#endif - case PMD_SIZE: - contig_ptes = 1; - break; case CONT_PMD_SIZE: *pgsize = PMD_SIZE; contig_ptes = CONT_PMDS; @@ -122,6 +113,8 @@ static inline int num_contig_ptes(unsigned long size, size_t *pgsize) *pgsize = PAGE_SIZE; contig_ptes = CONT_PTES; break; + default: + WARN_ON(!__hugetlb_valid_size(size)); } return contig_ptes; @@ -163,24 +156,23 @@ static pte_t get_clear_contig(struct mm_struct *mm, unsigned long pgsize, unsigned long ncontig) { - pte_t orig_pte = __ptep_get(ptep); - unsigned long i; - - for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) { - pte_t pte = __ptep_get_and_clear(mm, addr, ptep); - - /* - * If HW_AFDBM is enabled, then the HW could turn on - * the dirty or accessed bit for any page in the set, - * so check them all. - */ - if (pte_dirty(pte)) - orig_pte = pte_mkdirty(orig_pte); - - if (pte_young(pte)) - orig_pte = pte_mkyoung(orig_pte); + pte_t pte, tmp_pte; + bool present; + + pte = __ptep_get_and_clear(mm, addr, ptep); + present = pte_present(pte); + while (--ncontig) { + ptep++; + addr += pgsize; + tmp_pte = __ptep_get_and_clear(mm, addr, ptep); + if (present) { + if (pte_dirty(tmp_pte)) + pte = pte_mkdirty(pte); + if (pte_young(tmp_pte)) + pte = pte_mkyoung(pte); + } } - return orig_pte; + return pte; } static pte_t get_clear_contig_flush(struct mm_struct *mm, @@ -396,18 +388,13 @@ void huge_pte_clear(struct mm_struct *mm, unsigned long addr, __pte_clear(mm, addr, ptep); } -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, unsigned long sz) { int ncontig; size_t pgsize; - pte_t orig_pte = __ptep_get(ptep); - - if (!pte_cont(orig_pte)) - return __ptep_get_and_clear(mm, addr, ptep); - - ncontig = find_num_contig(mm, addr, ptep, &pgsize); + ncontig = num_contig_ptes(sz, &pgsize); return get_clear_contig(mm, addr, ptep, pgsize, ncontig); } @@ -549,6 +536,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size) pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { + unsigned long psize = huge_page_size(hstate_vma(vma)); + if (alternative_has_cap_unlikely(ARM64_WORKAROUND_2645198)) { /* * Break-before-make (BBM) is required for all user space mappings @@ -558,7 +547,7 @@ pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr if (pte_user_exec(__ptep_get(ptep))) return huge_ptep_clear_flush(vma, addr, ptep); } - return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); } void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9c0b8d9558fc..ccdef53872a0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -279,12 +279,7 @@ void __init arm64_memblock_init(void) if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern u16 memstart_offset_seed; - - /* - * Use the sanitised version of id_aa64mmfr0_el1 so that linear - * map randomization can be enabled by shrinking the IPA space. - */ - u64 mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); int parange = cpuid_feature_extract_unsigned_field( mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); s64 range = linear_region_size - diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 19c67ed1a21f..18543b603c77 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -162,6 +162,13 @@ static int copy_p4d(struct trans_pgd_info *info, pgd_t *dst_pgdp, unsigned long next; unsigned long addr = start; + if (pgd_none(READ_ONCE(*dst_pgdp))) { + dst_p4dp = trans_alloc(info); + if (!dst_p4dp) + return -ENOMEM; + pgd_populate(NULL, dst_pgdp, dst_p4dp); + } + dst_p4dp = p4d_offset(dst_pgdp, start); src_p4dp = p4d_offset(src_pgdp, start); do { diff --git a/arch/loongarch/include/asm/cpu-info.h b/arch/loongarch/include/asm/cpu-info.h index 35e0a230a484..7f5bc0ad9d50 100644 --- a/arch/loongarch/include/asm/cpu-info.h +++ b/arch/loongarch/include/asm/cpu-info.h @@ -76,27 +76,6 @@ extern const char *__cpu_full_name[]; #define cpu_family_string() __cpu_family[raw_smp_processor_id()] #define cpu_full_name_string() __cpu_full_name[raw_smp_processor_id()] -struct seq_file; -struct notifier_block; - -extern int register_proc_cpuinfo_notifier(struct notifier_block *nb); -extern int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v); - -#define proc_cpuinfo_notifier(fn, pri) \ -({ \ - static struct notifier_block fn##_nb = { \ - .notifier_call = fn, \ - .priority = pri \ - }; \ - \ - register_proc_cpuinfo_notifier(&fn##_nb); \ -}) - -struct proc_cpuinfo_notifier_args { - struct seq_file *m; - unsigned long n; -}; - static inline bool cpus_are_siblings(int cpua, int cpub) { struct cpuinfo_loongarch *infoa = &cpu_data[cpua]; diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h index c8e4057734d0..4dc4b3e04225 100644 --- a/arch/loongarch/include/asm/hugetlb.h +++ b/arch/loongarch/include/asm/hugetlb.h @@ -36,7 +36,8 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + unsigned long sz) { pte_t clear; pte_t pte = ptep_get(ptep); @@ -51,8 +52,9 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; + unsigned long sz = huge_page_size(hstate_vma(vma)); - pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz); flush_tlb_page(vma, addr); return pte; } diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 3383c9d24e94..b87d1d5e5890 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -77,6 +77,8 @@ extern int __cpu_logical_map[NR_CPUS]; #define SMP_IRQ_WORK BIT(ACTION_IRQ_WORK) #define SMP_CLEAR_VECTOR BIT(ACTION_CLEAR_VECTOR) +struct seq_file; + struct secondary_data { unsigned long stack; unsigned long thread_info; diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S index 86d5d90ebefe..4f0912141781 100644 --- a/arch/loongarch/kernel/genex.S +++ b/arch/loongarch/kernel/genex.S @@ -18,16 +18,19 @@ .align 5 SYM_FUNC_START(__arch_cpu_idle) - /* start of rollback region */ - LONG_L t0, tp, TI_FLAGS - nop - andi t0, t0, _TIF_NEED_RESCHED - bnez t0, 1f - nop - nop - nop + /* start of idle interrupt region */ + ori t0, zero, CSR_CRMD_IE + /* idle instruction needs irq enabled */ + csrxchg t0, t0, LOONGARCH_CSR_CRMD + /* + * If an interrupt lands here; between enabling interrupts above and + * going idle on the next instruction, we must *NOT* go idle since the + * interrupt could have set TIF_NEED_RESCHED or caused an timer to need + * reprogramming. Fall through -- see handle_vint() below -- and have + * the idle loop take care of things. + */ idle 0 - /* end of rollback region */ + /* end of idle interrupt region */ 1: jr ra SYM_FUNC_END(__arch_cpu_idle) @@ -35,11 +38,10 @@ SYM_CODE_START(handle_vint) UNWIND_HINT_UNDEFINED BACKUP_T0T1 SAVE_ALL - la_abs t1, __arch_cpu_idle + la_abs t1, 1b LONG_L t0, sp, PT_ERA - /* 32 byte rollback region */ - ori t0, t0, 0x1f - xori t0, t0, 0x1f + /* 3 instructions idle interrupt region */ + ori t0, t0, 0b1100 bne t0, t1, 1f LONG_S t0, sp, PT_ERA 1: move a0, sp diff --git a/arch/loongarch/kernel/idle.c b/arch/loongarch/kernel/idle.c index 0b5dd2faeb90..54b247d8cdb6 100644 --- a/arch/loongarch/kernel/idle.c +++ b/arch/loongarch/kernel/idle.c @@ -11,7 +11,6 @@ void __cpuidle arch_cpu_idle(void) { - raw_local_irq_enable(); - __arch_cpu_idle(); /* idle instruction needs irq enabled */ + __arch_cpu_idle(); raw_local_irq_disable(); } diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index 6ce46d92f1f1..cea30768ae92 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -13,28 +13,12 @@ #include <asm/processor.h> #include <asm/time.h> -/* - * No lock; only written during early bootup by CPU 0. - */ -static RAW_NOTIFIER_HEAD(proc_cpuinfo_chain); - -int __ref register_proc_cpuinfo_notifier(struct notifier_block *nb) -{ - return raw_notifier_chain_register(&proc_cpuinfo_chain, nb); -} - -int proc_cpuinfo_notifier_call_chain(unsigned long val, void *v) -{ - return raw_notifier_call_chain(&proc_cpuinfo_chain, val, v); -} - static int show_cpuinfo(struct seq_file *m, void *v) { unsigned long n = (unsigned long) v - 1; unsigned int isa = cpu_data[n].isa_level; unsigned int version = cpu_data[n].processor_id & 0xff; unsigned int fp_version = cpu_data[n].fpu_vers; - struct proc_cpuinfo_notifier_args proc_cpuinfo_notifier_args; #ifdef CONFIG_SMP if (!cpu_online(n)) @@ -91,20 +75,13 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (cpu_has_lbt_mips) seq_printf(m, " lbt_mips"); seq_printf(m, "\n"); - seq_printf(m, "Hardware Watchpoint\t: %s", - cpu_has_watch ? "yes, " : "no\n"); + seq_printf(m, "Hardware Watchpoint\t: %s", str_yes_no(cpu_has_watch)); if (cpu_has_watch) { - seq_printf(m, "iwatch count: %d, dwatch count: %d\n", + seq_printf(m, ", iwatch count: %d, dwatch count: %d", cpu_data[n].watch_ireg_count, cpu_data[n].watch_dreg_count); } - proc_cpuinfo_notifier_args.m = m; - proc_cpuinfo_notifier_args.n = n; - - raw_notifier_call_chain(&proc_cpuinfo_chain, 0, - &proc_cpuinfo_notifier_args); - - seq_printf(m, "\n"); + seq_printf(m, "\n\n"); return 0; } diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c index 1ef8c6383535..de8fa5a8a825 100644 --- a/arch/loongarch/kernel/reset.c +++ b/arch/loongarch/kernel/reset.c @@ -33,7 +33,7 @@ void machine_halt(void) console_flush_on_panic(CONSOLE_FLUSH_PENDING); while (true) { - __arch_cpu_idle(); + __asm__ __volatile__("idle 0" : : : "memory"); } } @@ -53,7 +53,7 @@ void machine_power_off(void) #endif while (true) { - __arch_cpu_idle(); + __asm__ __volatile__("idle 0" : : : "memory"); } } @@ -74,6 +74,6 @@ void machine_restart(char *command) acpi_reboot(); while (true) { - __arch_cpu_idle(); + __asm__ __volatile__("idle 0" : : : "memory"); } } diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index bf9268bf26d5..f6d3242b9234 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -303,9 +303,9 @@ int kvm_arch_enable_virtualization_cpu(void) * TOE=0: Trap on Exception. * TIT=0: Trap on Timer. */ - if (env & CSR_GCFG_GCIP_ALL) + if (env & CSR_GCFG_GCIP_SECURE) gcfg |= CSR_GCFG_GCI_SECURE; - if (env & CSR_GCFG_MATC_ROOT) + if (env & CSR_GCFG_MATP_ROOT) gcfg |= CSR_GCFG_MATC_ROOT; write_csr_gcfg(gcfg); diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index 0c292f818492..1be185e94807 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -85,7 +85,7 @@ * Guest CRMD comes from separate GCSR_CRMD register */ ori t0, zero, CSR_PRMD_PIE - csrxchg t0, t0, LOONGARCH_CSR_PRMD + csrwr t0, LOONGARCH_CSR_PRMD /* Set PVM bit to setup ertn to guest context */ ori t0, zero, CSR_GSTAT_PVM diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index fb72095c8077..20f941af3e9e 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1548,9 +1548,6 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Restore timer state regardless */ kvm_restore_timer(vcpu); - - /* Control guest page CCA attribute */ - change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT); kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); /* Restore hardware PMU CSRs */ diff --git a/arch/loongarch/lib/csum.c b/arch/loongarch/lib/csum.c index a5e84b403c3b..df309ae4045d 100644 --- a/arch/loongarch/lib/csum.c +++ b/arch/loongarch/lib/csum.c @@ -25,7 +25,7 @@ unsigned int __no_sanitize_address do_csum(const unsigned char *buff, int len) const u64 *ptr; u64 data, sum64 = 0; - if (unlikely(len == 0)) + if (unlikely(len <= 0)) return 0; offset = (unsigned long)buff & 7; diff --git a/arch/loongarch/mm/pageattr.c b/arch/loongarch/mm/pageattr.c index bf8678248444..99165903908a 100644 --- a/arch/loongarch/mm/pageattr.c +++ b/arch/loongarch/mm/pageattr.c @@ -3,6 +3,7 @@ * Copyright (C) 2024 Loongson Technology Corporation Limited */ +#include <linux/memblock.h> #include <linux/pagewalk.h> #include <linux/pgtable.h> #include <asm/set_memory.h> @@ -167,7 +168,7 @@ bool kernel_page_present(struct page *page) unsigned long addr = (unsigned long)page_address(page); if (addr < vm_map_base) - return true; + return memblock_is_memory(__pa(addr)); pgd = pgd_offset_k(addr); if (pgd_none(pgdp_get(pgd))) diff --git a/arch/mips/boot/tools/relocs.c b/arch/mips/boot/tools/relocs.c index a88d66c46d7f..9863e1d5c62e 100644 --- a/arch/mips/boot/tools/relocs.c +++ b/arch/mips/boot/tools/relocs.c @@ -468,6 +468,8 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, const char *symname)) { int i; + struct section *extab_sec = sec_lookup("__ex_table"); + int extab_index = extab_sec ? extab_sec - secs : -1; /* Walk through the relocations */ for (i = 0; i < ehdr.e_shnum; i++) { @@ -480,6 +482,9 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel, if (sec->shdr.sh_type != SHT_REL_TYPE) continue; + if (sec->shdr.sh_info == extab_index) + continue; + sec_symtab = sec->link; sec_applies = &secs[sec->shdr.sh_info]; if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h index d0a86ce83de9..fbc71ddcf0f6 100644 --- a/arch/mips/include/asm/hugetlb.h +++ b/arch/mips/include/asm/hugetlb.h @@ -27,7 +27,8 @@ static inline int prepare_hugepage_range(struct file *file, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + unsigned long sz) { pte_t clear; pte_t pte = *ptep; @@ -42,13 +43,14 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; + unsigned long sz = huge_page_size(hstate_vma(vma)); /* * clear the huge pte entry firstly, so that the other smp threads will * not get old pte entry after finishing flush_tlb_page and before * setting new huge pte entry */ - pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz); flush_tlb_page(vma, addr); return pte; } diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index 4a2b40ce39e0..85fa9962266a 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -27,8 +27,8 @@ */ struct pt_regs { #ifdef CONFIG_32BIT - /* Pad bytes for argument save space on the stack. */ - unsigned long pad0[8]; + /* Saved syscall stack arguments; entries 0-3 unused. */ + unsigned long args[8]; #endif /* Saved main processor registers. */ diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index ebdf4d910af2..056aa1b713e2 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -57,37 +57,21 @@ static inline void mips_syscall_update_nr(struct task_struct *task, static inline void mips_get_syscall_arg(unsigned long *arg, struct task_struct *task, struct pt_regs *regs, unsigned int n) { - unsigned long usp __maybe_unused = regs->regs[29]; - +#ifdef CONFIG_32BIT switch (n) { case 0: case 1: case 2: case 3: *arg = regs->regs[4 + n]; - - return; - -#ifdef CONFIG_32BIT - case 4: case 5: case 6: case 7: - get_user(*arg, (int *)usp + n); return; -#endif - -#ifdef CONFIG_64BIT case 4: case 5: case 6: case 7: -#ifdef CONFIG_MIPS32_O32 - if (test_tsk_thread_flag(task, TIF_32BIT_REGS)) - get_user(*arg, (int *)usp + n); - else -#endif - *arg = regs->regs[4 + n]; - + *arg = regs->args[n]; return; -#endif - - default: - BUG(); } - - unreachable(); +#else + *arg = regs->regs[4 + n]; + if ((IS_ENABLED(CONFIG_MIPS32_O32) && + test_tsk_thread_flag(task, TIF_32BIT_REGS))) + *arg = (unsigned int)*arg; +#endif } static inline long syscall_get_error(struct task_struct *task, diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index cb1045ebab06..b910ec54a3a1 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -27,6 +27,12 @@ void output_ptreg_defines(void); void output_ptreg_defines(void) { COMMENT("MIPS pt_regs offsets."); +#ifdef CONFIG_32BIT + OFFSET(PT_ARG4, pt_regs, args[4]); + OFFSET(PT_ARG5, pt_regs, args[5]); + OFFSET(PT_ARG6, pt_regs, args[6]); + OFFSET(PT_ARG7, pt_regs, args[7]); +#endif OFFSET(PT_R0, pt_regs, regs[0]); OFFSET(PT_R1, pt_regs, regs[1]); OFFSET(PT_R2, pt_regs, regs[2]); diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 2c604717e630..4947a4f39e37 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -64,10 +64,10 @@ load_a6: user_lw(t7, 24(t0)) # argument #7 from usp load_a7: user_lw(t8, 28(t0)) # argument #8 from usp loads_done: - sw t5, 16(sp) # argument #5 to ksp - sw t6, 20(sp) # argument #6 to ksp - sw t7, 24(sp) # argument #7 to ksp - sw t8, 28(sp) # argument #8 to ksp + sw t5, PT_ARG4(sp) # argument #5 to ksp + sw t6, PT_ARG5(sp) # argument #6 to ksp + sw t7, PT_ARG6(sp) # argument #7 to ksp + sw t8, PT_ARG7(sp) # argument #8 to ksp .set pop .section __ex_table,"a" diff --git a/arch/parisc/include/asm/hugetlb.h b/arch/parisc/include/asm/hugetlb.h index 5b3a5429f71b..21e9ace17739 100644 --- a/arch/parisc/include/asm/hugetlb.h +++ b/arch/parisc/include/asm/hugetlb.h @@ -10,7 +10,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); + pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index e9d18cf25b79..a94fe546d434 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -126,7 +126,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) + pte_t *ptep, unsigned long sz) { pte_t entry; diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index c3efacab4b94..aa90a048f319 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -77,9 +77,17 @@ /* * With 4K page size the real_pte machinery is all nops. */ -#define __real_pte(e, p, o) ((real_pte_t){(e)}) +static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset) +{ + return (real_pte_t){pte}; +} + #define __rpte_to_pte(r) ((r).pte) -#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT) + +static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index) +{ + return pte_val(__rpte_to_pte(rpte)) >> H_PAGE_F_GIX_SHIFT; +} #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \ do { \ diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index dad2e7980f24..86326587e58d 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -45,7 +45,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, + unsigned long sz) { return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1)); } @@ -55,8 +56,9 @@ static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; + unsigned long sz = huge_page_size(hstate_vma(vma)); - pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz); flush_hugetlb_page(vma, addr); return pte; } diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index af97fbb3c257..f84e0337cc02 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -108,7 +108,7 @@ static int text_area_cpu_up(unsigned int cpu) unsigned long addr; int err; - area = get_vm_area(PAGE_SIZE, VM_ALLOC); + area = get_vm_area(PAGE_SIZE, 0); if (!area) { WARN_ONCE(1, "Failed to create text area for cpu %d\n", cpu); @@ -493,7 +493,9 @@ static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool rep orig_mm = start_using_temp_mm(patching_mm); + kasan_disable_current(); err = __patch_instructions(patch_addr, code, len, repeat_instr); + kasan_enable_current(); /* context synchronisation performed by __patch_instructions */ stop_using_temp_mm(patching_mm, orig_mm); diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index 1aa0cb097c9c..7b9a5ea9cad9 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -75,7 +75,7 @@ static void fsl_msi_print_chip(struct irq_data *irqd, struct seq_file *p) srs = (hwirq >> msi_data->srs_shift) & MSI_SRS_MASK; cascade_virq = msi_data->cascade_array[srs]->virq; - seq_printf(p, " fsl-msi-%d", cascade_virq); + seq_printf(p, "fsl-msi-%d", cascade_virq); } diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h index 4cadc56220fe..427c41dde643 100644 --- a/arch/riscv/include/asm/cmpxchg.h +++ b/arch/riscv/include/asm/cmpxchg.h @@ -231,7 +231,7 @@ __arch_cmpxchg(".w", ".w" sc_sfx, ".w" cas_sfx, \ sc_prepend, sc_append, \ cas_prepend, cas_append, \ - __ret, __ptr, (long), __old, __new); \ + __ret, __ptr, (long)(int)(long), __old, __new); \ break; \ case 8: \ __arch_cmpxchg(".d", ".d" sc_sfx, ".d" cas_sfx, \ diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h index 72be100afa23..90c86b115e00 100644 --- a/arch/riscv/include/asm/futex.h +++ b/arch/riscv/include/asm/futex.h @@ -93,7 +93,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %[r]) \ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %[r]) \ : [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr), [t] "=&r" (tmp) - : [ov] "Jr" (oldval), [nv] "Jr" (newval) + : [ov] "Jr" ((long)(int)oldval), [nv] "Jr" (newval) : "memory"); __disable_user_access(); diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index faf3624d8057..446126497768 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -28,7 +28,8 @@ void set_huge_pte_at(struct mm_struct *mm, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); + unsigned long addr, pte_t *ptep, + unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 2d40736fc37c..26b085dbdd07 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -108,11 +108,11 @@ int populate_cache_leaves(unsigned int cpu) if (!np) return -ENOENT; - if (of_property_read_bool(np, "cache-size")) + if (of_property_present(np, "cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) + if (of_property_present(np, "i-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) + if (of_property_present(np, "d-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); prev = np; @@ -125,11 +125,11 @@ int populate_cache_leaves(unsigned int cpu) break; if (level <= levels) break; - if (of_property_read_bool(np, "cache-size")) + if (of_property_present(np, "cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level); - if (of_property_read_bool(np, "i-cache-size")) + if (of_property_present(np, "i-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_INST, level); - if (of_property_read_bool(np, "d-cache-size")) + if (of_property_present(np, "d-cache-size")) ci_leaf_init(this_leaf++, CACHE_TYPE_DATA, level); levels = level; } diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c6ba750536c3..40ac72e407b6 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -479,7 +479,7 @@ static void __init riscv_resolve_isa(unsigned long *source_isa, if (bit < RISCV_ISA_EXT_BASE) *this_hwcap |= isa2hwcap[bit]; } - } while (loop && memcmp(prev_resolved_isa, resolved_isa, sizeof(prev_resolved_isa))); + } while (loop && !bitmap_equal(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX)); } static void __init match_isa_ext(const char *name, const char *name_end, unsigned long *bitmap) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index f1793630fc51..4fe45daa6281 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -322,8 +322,8 @@ void __init setup_arch(char **cmdline_p) riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); - init_rt_signal_env(); apply_boot_alternatives(); + init_rt_signal_env(); if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && riscv_isa_extension_available(NULL, ZICBOM)) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 94e905eea1de..08378fea3a11 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -215,12 +215,6 @@ static size_t get_rt_frame_size(bool cal_all) if (cal_all || riscv_v_vstate_query(task_pt_regs(current))) total_context_size += riscv_v_sc_size; } - /* - * Preserved a __riscv_ctx_hdr for END signal context header if an - * extension uses __riscv_extra_ext_header - */ - if (total_context_size) - total_context_size += sizeof(struct __riscv_ctx_hdr); frame_size += total_context_size; diff --git a/arch/riscv/kvm/aia_imsic.c b/arch/riscv/kvm/aia_imsic.c index a8085cd8215e..29ef9c2133a9 100644 --- a/arch/riscv/kvm/aia_imsic.c +++ b/arch/riscv/kvm/aia_imsic.c @@ -974,7 +974,6 @@ int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu, if (imsic->vsfile_cpu >= 0) { writel(iid, imsic->vsfile_va + IMSIC_MMIO_SETIPNUM_LE); - kvm_vcpu_kick(vcpu); } else { eix = &imsic->swfile->eix[iid / BITS_PER_TYPE(u64)]; set_bit(iid & (BITS_PER_TYPE(u64) - 1), eix->eip); diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c index dce667f4b6ab..3070bb31745d 100644 --- a/arch/riscv/kvm/vcpu_sbi_hsm.c +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -9,6 +9,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> +#include <linux/wordpart.h> #include <asm/sbi.h> #include <asm/kvm_vcpu_sbi.h> @@ -79,12 +80,12 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!kvm_riscv_vcpu_stopped(target_vcpu)) - return SBI_HSM_STATE_STARTED; - else if (vcpu->stat.generic.blocking) + if (kvm_riscv_vcpu_stopped(target_vcpu)) + return SBI_HSM_STATE_STOPPED; + else if (target_vcpu->stat.generic.blocking) return SBI_HSM_STATE_SUSPENDED; else - return SBI_HSM_STATE_STOPPED; + return SBI_HSM_STATE_STARTED; } static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, @@ -109,7 +110,7 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, } return 0; case SBI_EXT_HSM_HART_SUSPEND: - switch (cp->a0) { + switch (lower_32_bits(cp->a0)) { case SBI_HSM_SUSPEND_RET_DEFAULT: kvm_riscv_vcpu_wfi(vcpu); break; diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 9c2ab3dfa93a..5fbf3f94f1e8 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -21,7 +21,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, u64 next_cycle; if (cp->a6 != SBI_EXT_TIME_SET_TIMER) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } @@ -51,9 +51,10 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_cpu_context *cp = &vcpu->arch.guest_context; unsigned long hmask = cp->a0; unsigned long hbase = cp->a1; + unsigned long hart_bit = 0, sentmask = 0; if (cp->a6 != SBI_EXT_IPI_SEND_IPI) { - retdata->err_val = SBI_ERR_INVALID_PARAM; + retdata->err_val = SBI_ERR_NOT_SUPPORTED; return 0; } @@ -62,15 +63,23 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, if (hbase != -1UL) { if (tmp->vcpu_id < hbase) continue; - if (!(hmask & (1UL << (tmp->vcpu_id - hbase)))) + hart_bit = tmp->vcpu_id - hbase; + if (hart_bit >= __riscv_xlen) + goto done; + if (!(hmask & (1UL << hart_bit))) continue; } ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT); if (ret < 0) break; + sentmask |= 1UL << hart_bit; kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RCVD); } +done: + if (hbase != -1UL && (hmask ^ sentmask)) + retdata->err_val = SBI_ERR_INVALID_PARAM; + return ret; } diff --git a/arch/riscv/kvm/vcpu_sbi_system.c b/arch/riscv/kvm/vcpu_sbi_system.c index 5d55e08791fa..bc0ebba89003 100644 --- a/arch/riscv/kvm/vcpu_sbi_system.c +++ b/arch/riscv/kvm/vcpu_sbi_system.c @@ -4,6 +4,7 @@ */ #include <linux/kvm_host.h> +#include <linux/wordpart.h> #include <asm/kvm_vcpu_sbi.h> #include <asm/sbi.h> @@ -19,7 +20,7 @@ static int kvm_sbi_ext_susp_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, switch (funcid) { case SBI_EXT_SUSP_SYSTEM_SUSPEND: - if (cp->a0 != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) { + if (lower_32_bits(cp->a0) != SBI_SUSP_SLEEP_TYPE_SUSPEND_TO_RAM) { retdata->err_val = SBI_ERR_INVALID_PARAM; return 0; } diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 42314f093922..b4a78a4b35cf 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -293,7 +293,7 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) + pte_t *ptep, unsigned long sz) { pte_t orig_pte = ptep_get(ptep); int pte_num; diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 885bd1dd2c82..9276e0576d0a 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -86,7 +86,7 @@ static int cmma_test_essa(void) : [reg1] "=&d" (reg1), [reg2] "=&a" (reg2), [rc] "+&d" (rc), - [tmp] "=&d" (tmp), + [tmp] "+&d" (tmp), "+Q" (get_lowcore()->program_new_psw), "=Q" (old) : [psw_old] "a" (&old), diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index d6beec5292a0..80bdfbae6e5b 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -469,6 +469,7 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set +CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -740,7 +741,6 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y -CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_USER=m # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set @@ -875,6 +875,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_GRAPH_RETVAL=y +CONFIG_FUNCTION_GRAPH_RETADDR=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 8cfbfb10bba8..449a0e996b96 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -459,6 +459,7 @@ CONFIG_SCSI_DH_ALUA=m CONFIG_MD=y CONFIG_BLK_DEV_MD=y # CONFIG_MD_BITMAP_FILE is not set +CONFIG_MD_LINEAR=m CONFIG_MD_CLUSTER=m CONFIG_BCACHE=m CONFIG_BLK_DEV_DM=y @@ -725,7 +726,6 @@ CONFIG_IMA=y CONFIG_IMA_DEFAULT_HASH_SHA256=y CONFIG_IMA_WRITE_POLICY=y CONFIG_IMA_APPRAISE=y -CONFIG_LSM="yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_CRYPTO_FIPS=y CONFIG_CRYPTO_USER=m @@ -826,6 +826,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y CONFIG_FUNCTION_GRAPH_RETVAL=y +CONFIG_FUNCTION_GRAPH_RETADDR=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index bcbaa069de96..853b2326a171 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -62,7 +62,6 @@ CONFIG_ZFCP=y # CONFIG_INOTIFY_USER is not set # CONFIG_MISC_FILESYSTEMS is not set # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_LSM="yama,loadpin,safesetid,integrity" # CONFIG_ZLIB_DFLTCC is not set CONFIG_XZ_DEC_MICROLZMA=y CONFIG_PRINTK_TIME=y diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index d5125296ade2..a5ca0a947691 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -53,7 +53,11 @@ static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsig unsigned long mask; int cc; - if (__builtin_constant_p(nr)) { + /* + * With CONFIG_PROFILE_ALL_BRANCHES enabled gcc fails to + * handle __builtin_constant_p() in some cases. + */ + if (!IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && __builtin_constant_p(nr)) { addr = (const volatile unsigned char *)ptr; addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE; mask = 1UL << (nr & (BITS_PER_BYTE - 1)); diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 13f51a6a5bb1..4e73ef46d4b2 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -23,7 +23,6 @@ /** * struct gmap_struct - guest address space * @list: list head for the mm->context gmap list - * @crst_list: list of all crst tables used in the guest address space * @mm: pointer to the parent mm_struct * @guest_to_host: radix tree with guest to host address translation * @host_to_guest: radix tree with pointer to segment table entries @@ -35,7 +34,6 @@ * @guest_handle: protected virtual machine handle for the ultravisor * @host_to_rmap: radix tree with gmap_rmap lists * @children: list of shadow gmap structures - * @pt_list: list of all page tables used in the shadow guest address space * @shadow_lock: spinlock to protect the shadow gmap list * @parent: pointer to the parent gmap for shadow guest address spaces * @orig_asce: ASCE for which the shadow page table has been created @@ -45,7 +43,6 @@ */ struct gmap { struct list_head list; - struct list_head crst_list; struct mm_struct *mm; struct radix_tree_root guest_to_host; struct radix_tree_root host_to_guest; @@ -61,7 +58,6 @@ struct gmap { /* Additional data for shadow guest address spaces */ struct radix_tree_root host_to_rmap; struct list_head children; - struct list_head pt_list; spinlock_t shadow_lock; struct gmap *parent; unsigned long orig_asce; @@ -106,23 +102,21 @@ struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit); void gmap_remove(struct gmap *gmap); struct gmap *gmap_get(struct gmap *gmap); void gmap_put(struct gmap *gmap); +void gmap_free(struct gmap *gmap); +struct gmap *gmap_alloc(unsigned long limit); int gmap_map_segment(struct gmap *gmap, unsigned long from, unsigned long to, unsigned long len); int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); -unsigned long gmap_translate(struct gmap *, unsigned long gaddr); int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); -int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); void gmap_discard(struct gmap *, unsigned long from, unsigned long to); void __gmap_zap(struct gmap *, unsigned long gaddr); void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val); -struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, - int edat_level); -int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level); +void gmap_unshadow(struct gmap *sg); int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, int fake); int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, @@ -131,24 +125,22 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, int fake); int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, int fake); -int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, - unsigned long *pgt, int *dat_protection, int *fake); int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte); void gmap_register_pte_notifier(struct gmap_notifier *); void gmap_unregister_pte_notifier(struct gmap_notifier *); -int gmap_mprotect_notify(struct gmap *, unsigned long start, - unsigned long len, int prot); +int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits); void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], unsigned long gaddr, unsigned long vmaddr); int s390_disable_cow_sharing(void); -void s390_unlist_old_asce(struct gmap *gmap); int s390_replace_asce(struct gmap *gmap); void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns); int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, unsigned long end, bool interruptible); +int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split); +unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level); /** * s390_uv_destroy_range - Destroy a range of pages in the given mm. diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index 7c52acaf9f82..663e87220e89 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -25,8 +25,16 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep); + #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + unsigned long sz) +{ + return __huge_ptep_get_and_clear(mm, addr, ptep); +} static inline void arch_clear_hugetlb_flags(struct folio *folio) { @@ -48,7 +56,7 @@ static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr, static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - return huge_ptep_get_and_clear(vma->vm_mm, address, ptep); + return __huge_ptep_get_and_clear(vma->vm_mm, address, ptep); } #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS @@ -59,7 +67,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte); if (changed) { - huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + __huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); } return changed; @@ -69,7 +77,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); + pte_t pte = __huge_ptep_get_and_clear(mm, addr, ptep); __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 97c7c8127543..9a367866cab0 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -30,6 +30,8 @@ #define KVM_S390_ESCA_CPU_SLOTS 248 #define KVM_MAX_VCPUS 255 +#define KVM_INTERNAL_MEM_SLOTS 1 + /* * These seem to be used for allocating ->chip in the routing table, which we * don't use. 1 is as small as we can get to reduce the needed memory. If we @@ -931,12 +933,14 @@ struct sie_page2 { u8 reserved928[0x1000 - 0x928]; /* 0x0928 */ }; +struct vsie_page; + struct kvm_s390_vsie { struct mutex mutex; struct radix_tree_root addr_to_page; int page_count; int next; - struct page *pages[KVM_MAX_VCPUS]; + struct vsie_page *pages[KVM_MAX_VCPUS]; }; struct kvm_s390_gisa_iam { diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index a3b51056a177..3ca5af4cfe43 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -420,9 +420,10 @@ void setup_protection_map(void); #define PGSTE_HC_BIT 0x0020000000000000UL #define PGSTE_GR_BIT 0x0004000000000000UL #define PGSTE_GC_BIT 0x0002000000000000UL -#define PGSTE_UC_BIT 0x0000800000000000UL /* user dirty (migration) */ -#define PGSTE_IN_BIT 0x0000400000000000UL /* IPTE notify bit */ -#define PGSTE_VSIE_BIT 0x0000200000000000UL /* ref'd in a shadow table */ +#define PGSTE_ST2_MASK 0x0000ffff00000000UL +#define PGSTE_UC_BIT 0x0000000000008000UL /* user dirty (migration) */ +#define PGSTE_IN_BIT 0x0000000000004000UL /* IPTE notify bit */ +#define PGSTE_VSIE_BIT 0x0000000000002000UL /* ref'd in a shadow table */ /* Guest Page State used for virtualization */ #define _PGSTE_GPS_ZERO 0x0000000080000000UL @@ -2007,4 +2008,18 @@ extern void s390_reset_cmma(struct mm_struct *mm); #define pmd_pgtable(pmd) \ ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)) +static inline unsigned long gmap_pgste_get_pgt_addr(unsigned long *pgt) +{ + unsigned long *pgstes, res; + + pgstes = pgt + _PAGE_ENTRIES; + + res = (pgstes[0] & PGSTE_ST2_MASK) << 16; + res |= pgstes[1] & PGSTE_ST2_MASK; + res |= (pgstes[2] & PGSTE_ST2_MASK) >> 16; + res |= (pgstes[3] & PGSTE_ST2_MASK) >> 32; + + return res; +} + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index dc332609f2c3..b11f5b6d0bd1 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -628,12 +628,12 @@ static inline int is_prot_virt_host(void) } int uv_pin_shared(unsigned long paddr); -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); int uv_destroy_folio(struct folio *folio); int uv_destroy_pte(pte_t pte); int uv_convert_from_secure_pte(pte_t pte); -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); +int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb); +int uv_convert_from_secure(unsigned long paddr); +int uv_convert_from_secure_folio(struct folio *folio); void setup_uv(void); diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 6f9654a191ad..9f05df2da2f7 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -19,19 +19,6 @@ #include <asm/sections.h> #include <asm/uv.h> -#if !IS_ENABLED(CONFIG_KVM) -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - return 0; -} - -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) -{ - return 0; -} -#endif - /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); @@ -159,6 +146,7 @@ int uv_destroy_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL(uv_destroy_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -175,7 +163,7 @@ int uv_destroy_pte(pte_t pte) * * @paddr: Absolute host address of page to be exported */ -static int uv_convert_from_secure(unsigned long paddr) +int uv_convert_from_secure(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, @@ -187,11 +175,12 @@ static int uv_convert_from_secure(unsigned long paddr) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure); /* * The caller must already hold a reference to the folio. */ -static int uv_convert_from_secure_folio(struct folio *folio) +int uv_convert_from_secure_folio(struct folio *folio) { int rc; @@ -206,6 +195,7 @@ static int uv_convert_from_secure_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -237,13 +227,33 @@ static int expected_folio_refs(struct folio *folio) return res; } -static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +/** + * make_folio_secure() - make a folio secure + * @folio: the folio to make secure + * @uvcb: the uvcb that describes the UVC to be used + * + * The folio @folio will be made secure if possible, @uvcb will be passed + * as-is to the UVC. + * + * Return: 0 on success; + * -EBUSY if the folio is in writeback or has too many references; + * -E2BIG if the folio is large; + * -EAGAIN if the UVC needs to be attempted again; + * -ENXIO if the address is not mapped; + * -EINVAL if the UVC failed for other reasons. + * + * Context: The caller must hold exactly one extra reference on the folio + * (it's the same logic as split_folio()) + */ +int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; + if (folio_test_large(folio)) + return -E2BIG; if (folio_test_writeback(folio)) - return -EAGAIN; - expected = expected_folio_refs(folio); + return -EBUSY; + expected = expected_folio_refs(folio) + 1; if (!folio_ref_freeze(folio, expected)) return -EBUSY; set_bit(PG_arch_1, &folio->flags); @@ -267,251 +277,7 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return -EAGAIN; return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } - -/** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) -{ - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} - -/* - * Drain LRU caches: the local one on first invocation and the ones of all - * CPUs on successive invocations. Returns "true" on the first invocation. - */ -static bool drain_lru(bool *drain_lru_called) -{ - /* - * If we have tried a local drain and the folio refcount - * still does not match our expected safe value, try with a - * system wide drain. This is needed if the pagevecs holding - * the page are on a different CPU. - */ - if (*drain_lru_called) { - lru_add_drain_all(); - /* We give up here, don't retry immediately. */ - return false; - } - /* - * We are here if the folio refcount does not match the - * expected safe value. The main culprits are usually - * pagevecs. With lru_add_drain() we drain the pagevecs - * on the local CPU so that hopefully the refcount will - * reach the expected safe value. - */ - lru_add_drain(); - *drain_lru_called = true; - /* The caller should try again immediately */ - return true; -} - -/* - * Requests the Ultravisor to make a page accessible to a guest. - * If it's brought in the first time, it will be cleared. If - * it has been exported before, it will be decrypted and integrity - * checked. - */ -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) -{ - struct vm_area_struct *vma; - bool drain_lru_called = false; - spinlock_t *ptelock; - unsigned long uaddr; - struct folio *folio; - pte_t *ptep; - int rc; - -again: - rc = -EFAULT; - mmap_read_lock(gmap->mm); - - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. If - * userspace is playing dirty tricky with mapping huge pages later - * on this will result in a segmentation fault. - */ - if (is_vm_hugetlb_page(vma)) - goto out; - - rc = -ENXIO; - ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); - if (!ptep) - goto out; - if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { - folio = page_folio(pte_page(*ptep)); - rc = -EAGAIN; - if (folio_test_large(folio)) { - rc = -E2BIG; - } else if (folio_trylock(folio)) { - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(PFN_PHYS(folio_pfn(folio))); - rc = make_folio_secure(folio, uvcb); - folio_unlock(folio); - } - - /* - * Once we drop the PTL, the folio may get unmapped and - * freed immediately. We need a temporary reference. - */ - if (rc == -EAGAIN || rc == -E2BIG) - folio_get(folio); - } - pte_unmap_unlock(ptep, ptelock); -out: - mmap_read_unlock(gmap->mm); - - switch (rc) { - case -E2BIG: - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - folio_put(folio); - - switch (rc) { - case 0: - /* Splitting succeeded, try again immediately. */ - goto again; - case -EAGAIN: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -EBUSY: - /* Unexpected race. */ - return -EAGAIN; - } - WARN_ON_ONCE(1); - return -ENXIO; - case -EAGAIN: - /* - * If we are here because the UVC returned busy or partial - * completion, this is just a useless check, but it is safe. - */ - folio_wait_writeback(folio); - folio_put(folio); - return -EAGAIN; - case -EBUSY: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -ENXIO: - if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) - return -EFAULT; - return -EAGAIN; - } - return rc; -} -EXPORT_SYMBOL_GPL(gmap_make_secure); - -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) -{ - struct uv_cb_cts uvcb = { - .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, - .header.len = sizeof(uvcb), - .guest_handle = gmap->guest_handle, - .gaddr = gaddr, - }; - - return gmap_make_secure(gmap, gaddr, &uvcb); -} -EXPORT_SYMBOL_GPL(gmap_convert_to_secure); - -/** - * gmap_destroy_page - Destroy a guest page. - * @gmap: the gmap of the guest - * @gaddr: the guest address to destroy - * - * An attempt will be made to destroy the given guest page. If the attempt - * fails, an attempt is made to export the page. If both attempts fail, an - * appropriate error is returned. - */ -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) -{ - struct vm_area_struct *vma; - struct folio_walk fw; - unsigned long uaddr; - struct folio *folio; - int rc; - - rc = -EFAULT; - mmap_read_lock(gmap->mm); - - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Huge pages should not be able to become secure - */ - if (is_vm_hugetlb_page(vma)) - goto out; - - rc = 0; - folio = folio_walk_start(&fw, vma, uaddr, 0); - if (!folio) - goto out; - /* - * See gmap_make_secure(): large folios cannot be secure. Small - * folio implies FW_LEVEL_PTE. - */ - if (folio_test_large(folio) || !pte_write(fw.pte)) - goto out_walk_end; - rc = uv_destroy_folio(folio); - /* - * Fault handlers can race; it is possible that two CPUs will fault - * on the same secure page. One CPU can destroy the page, reboot, - * re-enter secure mode and import it, while the second CPU was - * stuck at the beginning of the handler. At some point the second - * CPU will be able to progress, and it will not be able to destroy - * the page. In that case we do not want to terminate the process, - * we instead try to export the page. - */ - if (rc) - rc = uv_convert_from_secure_folio(folio); -out_walk_end: - folio_walk_end(&fw, vma); -out: - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_destroy_page); +EXPORT_SYMBOL_GPL(make_folio_secure); /* * To be called with the folio locked or with an extra reference! This will diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index 02217fb4ae10..f0ffe874adc2 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o +kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 9816b0060fbe..f6fded15633a 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -16,6 +16,7 @@ #include <asm/gmap.h> #include <asm/dat-bits.h> #include "kvm-s390.h" +#include "gmap.h" #include "gaccess.h" /* @@ -1393,6 +1394,44 @@ shadow_pgt: } /** + * shadow_pgt_lookup() - find a shadow page table + * @sg: pointer to the shadow guest address space structure + * @saddr: the address in the shadow aguest address space + * @pgt: parent gmap address of the page table to get shadowed + * @dat_protection: if the pgtable is marked as protected by dat + * @fake: pgt references contiguous guest memory block, not a pgtable + * + * Returns 0 if the shadow page table was found and -EAGAIN if the page + * table was not found. + * + * Called with sg->mm->mmap_lock in read. + */ +static int shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, unsigned long *pgt, + int *dat_protection, int *fake) +{ + unsigned long pt_index; + unsigned long *table; + struct page *page; + int rc; + + spin_lock(&sg->guest_table_lock); + table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ + if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { + /* Shadow page tables are full pages (pte+pgste) */ + page = pfn_to_page(*table >> PAGE_SHIFT); + pt_index = gmap_pgste_get_pgt_addr(page_to_virt(page)); + *pgt = pt_index & ~GMAP_SHADOW_FAKE_TABLE; + *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); + *fake = !!(pt_index & GMAP_SHADOW_FAKE_TABLE); + rc = 0; + } else { + rc = -EAGAIN; + } + spin_unlock(&sg->guest_table_lock); + return rc; +} + +/** * kvm_s390_shadow_fault - handle fault on a shadow page table * @vcpu: virtual cpu * @sg: pointer to the shadow guest address space structure @@ -1415,6 +1454,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, int dat_protection, fake; int rc; + if (KVM_BUG_ON(!gmap_is_shadow(sg), vcpu->kvm)) + return -EFAULT; + mmap_read_lock(sg->mm); /* * We don't want any guest-2 tables to change - so the parent @@ -1423,7 +1465,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, */ ipte_lock(vcpu->kvm); - rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); + rc = shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); if (rc) rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection, &fake); diff --git a/arch/s390/kvm/gmap-vsie.c b/arch/s390/kvm/gmap-vsie.c new file mode 100644 index 000000000000..a6d1dbb04c97 --- /dev/null +++ b/arch/s390/kvm/gmap-vsie.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest memory management for KVM/s390 nested VMs. + * + * Copyright IBM Corp. 2008, 2020, 2024 + * + * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * David Hildenbrand <david@redhat.com> + * Janosch Frank <frankja@linux.vnet.ibm.com> + */ + +#include <linux/compiler.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/pgtable.h> +#include <linux/pagemap.h> +#include <linux/mman.h> + +#include <asm/lowcore.h> +#include <asm/gmap.h> +#include <asm/uv.h> + +#include "kvm-s390.h" +#include "gmap.h" + +/** + * gmap_find_shadow - find a specific asce in the list of shadow tables + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * Returns the pointer to a gmap if a shadow table with the given asce is + * already available, ERR_PTR(-EAGAIN) if another one is just being created, + * otherwise NULL + * + * Context: Called with parent->shadow_lock held + */ +static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, int edat_level) +{ + struct gmap *sg; + + lockdep_assert_held(&parent->shadow_lock); + list_for_each_entry(sg, &parent->children, list) { + if (!gmap_shadow_valid(sg, asce, edat_level)) + continue; + if (!sg->initialized) + return ERR_PTR(-EAGAIN); + refcount_inc(&sg->ref_count); + return sg; + } + return NULL; +} + +/** + * gmap_shadow - create/find a shadow guest address space + * @parent: pointer to the parent gmap + * @asce: ASCE for which the shadow table is created + * @edat_level: edat level to be used for the shadow translation + * + * The pages of the top level page table referred by the asce parameter + * will be set to read-only and marked in the PGSTEs of the kvm process. + * The shadow table will be removed automatically on any change to the + * PTE mapping for the source table. + * + * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, + * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the + * parent gmap table could not be protected. + */ +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level) +{ + struct gmap *sg, *new; + unsigned long limit; + int rc; + + if (KVM_BUG_ON(parent->mm->context.allow_gmap_hpage_1m, (struct kvm *)parent->private) || + KVM_BUG_ON(gmap_is_shadow(parent), (struct kvm *)parent->private)) + return ERR_PTR(-EFAULT); + spin_lock(&parent->shadow_lock); + sg = gmap_find_shadow(parent, asce, edat_level); + spin_unlock(&parent->shadow_lock); + if (sg) + return sg; + /* Create a new shadow gmap */ + limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); + if (asce & _ASCE_REAL_SPACE) + limit = -1UL; + new = gmap_alloc(limit); + if (!new) + return ERR_PTR(-ENOMEM); + new->mm = parent->mm; + new->parent = gmap_get(parent); + new->private = parent->private; + new->orig_asce = asce; + new->edat_level = edat_level; + new->initialized = false; + spin_lock(&parent->shadow_lock); + /* Recheck if another CPU created the same shadow */ + sg = gmap_find_shadow(parent, asce, edat_level); + if (sg) { + spin_unlock(&parent->shadow_lock); + gmap_free(new); + return sg; + } + if (asce & _ASCE_REAL_SPACE) { + /* only allow one real-space gmap shadow */ + list_for_each_entry(sg, &parent->children, list) { + if (sg->orig_asce & _ASCE_REAL_SPACE) { + spin_lock(&sg->guest_table_lock); + gmap_unshadow(sg); + spin_unlock(&sg->guest_table_lock); + list_del(&sg->list); + gmap_put(sg); + break; + } + } + } + refcount_set(&new->ref_count, 2); + list_add(&new->list, &parent->children); + if (asce & _ASCE_REAL_SPACE) { + /* nothing to protect, return right away */ + new->initialized = true; + spin_unlock(&parent->shadow_lock); + return new; + } + spin_unlock(&parent->shadow_lock); + /* protect after insertion, so it will get properly invalidated */ + mmap_read_lock(parent->mm); + rc = __kvm_s390_mprotect_many(parent, asce & _ASCE_ORIGIN, + ((asce & _ASCE_TABLE_LENGTH) + 1), + PROT_READ, GMAP_NOTIFY_SHADOW); + mmap_read_unlock(parent->mm); + spin_lock(&parent->shadow_lock); + new->initialized = true; + if (rc) { + list_del(&new->list); + gmap_free(new); + new = ERR_PTR(rc); + } + spin_unlock(&parent->shadow_lock); + return new; +} diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c new file mode 100644 index 000000000000..02adf151d4de --- /dev/null +++ b/arch/s390/kvm/gmap.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Guest memory management for KVM/s390 + * + * Copyright IBM Corp. 2008, 2020, 2024 + * + * Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com> + * Martin Schwidefsky <schwidefsky@de.ibm.com> + * David Hildenbrand <david@redhat.com> + * Janosch Frank <frankja@linux.vnet.ibm.com> + */ + +#include <linux/compiler.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/pgtable.h> +#include <linux/pagemap.h> + +#include <asm/lowcore.h> +#include <asm/gmap.h> +#include <asm/uv.h> + +#include "gmap.h" + +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + +static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb) +{ + struct folio *folio = page_folio(page); + int rc; + + /* + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. + */ + if (folio_test_hugetlb(folio)) + return -EFAULT; + if (folio_test_large(folio)) { + mmap_read_unlock(gmap->mm); + rc = kvm_s390_wiggle_split_folio(gmap->mm, folio, true); + mmap_read_lock(gmap->mm); + if (rc) + return rc; + folio = page_folio(page); + } + + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, gmap->mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = make_folio_secure(folio, uvcb); + folio_unlock(folio); + + /* + * In theory a race is possible and the folio might have become + * large again before the folio_trylock() above. In that case, no + * action is performed and -EAGAIN is returned; the callers will + * have to try again later. + * In most cases this implies running the VM again, getting the same + * exception again, and make another attempt in this function. + * This is expected to happen extremely rarely. + */ + if (rc == -E2BIG) + return -EAGAIN; + /* The folio has too many references, try to shake some off */ + if (rc == -EBUSY) { + mmap_read_unlock(gmap->mm); + kvm_s390_wiggle_split_folio(gmap->mm, folio, false); + mmap_read_lock(gmap->mm); + return -EAGAIN; + } + + return rc; +} + +/** + * gmap_make_secure() - make one guest page secure + * @gmap: the guest gmap + * @gaddr: the guest address that needs to be made secure + * @uvcb: the UVCB specifying which operation needs to be performed + * + * Context: needs to be called with kvm->srcu held. + * Return: 0 on success, < 0 in case of error (see __gmap_make_secure()). + */ +int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) +{ + struct kvm *kvm = gmap->private; + struct page *page; + int rc = 0; + + lockdep_assert_held(&kvm->srcu); + + page = gfn_to_page(kvm, gpa_to_gfn(gaddr)); + mmap_read_lock(gmap->mm); + if (page) + rc = __gmap_make_secure(gmap, page, uvcb); + kvm_release_page_clean(page); + mmap_read_unlock(gmap->mm); + + return rc; +} + +int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) +{ + struct uv_cb_cts uvcb = { + .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, + .header.len = sizeof(uvcb), + .guest_handle = gmap->guest_handle, + .gaddr = gaddr, + }; + + return gmap_make_secure(gmap, gaddr, &uvcb); +} + +/** + * __gmap_destroy_page() - Destroy a guest page. + * @gmap: the gmap of the guest + * @page: the page to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: must be called holding the mm lock for gmap->mm + */ +static int __gmap_destroy_page(struct gmap *gmap, struct page *page) +{ + struct folio *folio = page_folio(page); + int rc; + + /* + * See gmap_make_secure(): large folios cannot be secure. Small + * folio implies FW_LEVEL_PTE. + */ + if (folio_test_large(folio)) + return -EFAULT; + + rc = uv_destroy_folio(folio); + /* + * Fault handlers can race; it is possible that two CPUs will fault + * on the same secure page. One CPU can destroy the page, reboot, + * re-enter secure mode and import it, while the second CPU was + * stuck at the beginning of the handler. At some point the second + * CPU will be able to progress, and it will not be able to destroy + * the page. In that case we do not want to terminate the process, + * we instead try to export the page. + */ + if (rc) + rc = uv_convert_from_secure_folio(folio); + + return rc; +} + +/** + * gmap_destroy_page() - Destroy a guest page. + * @gmap: the gmap of the guest + * @gaddr: the guest address to destroy + * + * An attempt will be made to destroy the given guest page. If the attempt + * fails, an attempt is made to export the page. If both attempts fail, an + * appropriate error is returned. + * + * Context: may sleep. + */ +int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) +{ + struct page *page; + int rc = 0; + + mmap_read_lock(gmap->mm); + page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr)); + if (page) + rc = __gmap_destroy_page(gmap, page); + kvm_release_page_clean(page); + mmap_read_unlock(gmap->mm); + return rc; +} diff --git a/arch/s390/kvm/gmap.h b/arch/s390/kvm/gmap.h new file mode 100644 index 000000000000..c8f031c9ea5f --- /dev/null +++ b/arch/s390/kvm/gmap.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2016, 2025 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + * Claudio Imbrenda <imbrenda@linux.ibm.com> + */ + +#ifndef ARCH_KVM_S390_GMAP_H +#define ARCH_KVM_S390_GMAP_H + +#define GMAP_SHADOW_FAKE_TABLE 1ULL + +int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); +int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); +int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); +struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level); + +/** + * gmap_shadow_valid - check if a shadow guest address space matches the + * given properties and is still valid + * @sg: pointer to the shadow guest address space structure + * @asce: ASCE for which the shadow table is requested + * @edat_level: edat level to be used for the shadow translation + * + * Returns 1 if the gmap shadow is still valid and matches the given + * properties, the caller can continue using it. Returns 0 otherwise, the + * caller has to request a new shadow gmap in this case. + * + */ +static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) +{ + if (sg->removed) + return 0; + return sg->orig_asce == asce && sg->edat_level == edat_level; +} + +#endif diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 5bbaadf75dc6..610dd44a948b 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -21,6 +21,7 @@ #include "gaccess.h" #include "trace.h" #include "trace-s390.h" +#include "gmap.h" u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu) { @@ -367,7 +368,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) reg2, &srcaddr, GACC_FETCH, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - rc = gmap_fault(vcpu->arch.gmap, srcaddr, 0); + rc = kvm_s390_handle_dat_fault(vcpu, srcaddr, 0); if (rc != 0) return rc; @@ -376,7 +377,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) reg1, &dstaddr, GACC_STORE, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); - rc = gmap_fault(vcpu->arch.gmap, dstaddr, FAULT_FLAG_WRITE); + rc = kvm_s390_handle_dat_fault(vcpu, dstaddr, FOLL_WRITE); if (rc != 0) return rc; @@ -549,7 +550,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu) * If the unpin did not succeed, the guest will exit again for the UVC * and we will retry the unpin. */ - if (rc == -EINVAL) + if (rc == -EINVAL || rc == -ENXIO) return 0; /* * If we got -EAGAIN here, we simply return it. It will eventually diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index d4f031e086fc..07ff0e10cb7f 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2893,7 +2893,8 @@ int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { - u64 uaddr; + u64 uaddr_s, uaddr_i; + int idx; switch (ue->type) { /* we store the userspace addresses instead of the guest addresses */ @@ -2901,14 +2902,16 @@ int kvm_set_routing_entry(struct kvm *kvm, if (kvm_is_ucontrol(kvm)) return -EINVAL; e->set = set_adapter_int; - uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.summary_addr); - if (uaddr == -EFAULT) - return -EFAULT; - e->adapter.summary_addr = uaddr; - uaddr = gmap_translate(kvm->arch.gmap, ue->u.adapter.ind_addr); - if (uaddr == -EFAULT) + + idx = srcu_read_lock(&kvm->srcu); + uaddr_s = gpa_to_hva(kvm, ue->u.adapter.summary_addr); + uaddr_i = gpa_to_hva(kvm, ue->u.adapter.ind_addr); + srcu_read_unlock(&kvm->srcu, idx); + + if (kvm_is_error_hva(uaddr_s) || kvm_is_error_hva(uaddr_i)) return -EFAULT; - e->adapter.ind_addr = uaddr; + e->adapter.summary_addr = uaddr_s; + e->adapter.ind_addr = uaddr_i; e->adapter.summary_offset = ue->u.adapter.summary_offset; e->adapter.ind_offset = ue->u.adapter.ind_offset; e->adapter.adapter_id = ue->u.adapter.adapter_id; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d8080c27d45b..ebecb96bacce 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -50,6 +50,7 @@ #include "kvm-s390.h" #include "gaccess.h" #include "pci.h" +#include "gmap.h" #define CREATE_TRACE_POINTS #include "trace.h" @@ -3428,8 +3429,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) VM_EVENT(kvm, 3, "vm created with type %lu", type); if (type & KVM_VM_S390_UCONTROL) { + struct kvm_userspace_memory_region2 fake_memslot = { + .slot = KVM_S390_UCONTROL_MEMSLOT, + .guest_phys_addr = 0, + .userspace_addr = 0, + .memory_size = ALIGN_DOWN(TASK_SIZE, _SEGMENT_SIZE), + .flags = 0, + }; + kvm->arch.gmap = NULL; kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; + /* one flat fake memslot covering the whole address-space */ + mutex_lock(&kvm->slots_lock); + KVM_BUG_ON(kvm_set_internal_memslot(kvm, &fake_memslot), kvm); + mutex_unlock(&kvm->slots_lock); } else { if (sclp.hamax == U64_MAX) kvm->arch.mem_limit = TASK_SIZE_MAX; @@ -4498,6 +4511,75 @@ static bool ibs_enabled(struct kvm_vcpu *vcpu) return kvm_s390_test_cpuflags(vcpu, CPUSTAT_IBS); } +static int __kvm_s390_fixup_fault_sync(struct gmap *gmap, gpa_t gaddr, unsigned int flags) +{ + struct kvm *kvm = gmap->private; + gfn_t gfn = gpa_to_gfn(gaddr); + bool unlocked; + hva_t vmaddr; + gpa_t tmp; + int rc; + + if (kvm_is_ucontrol(kvm)) { + tmp = __gmap_translate(gmap, gaddr); + gfn = gpa_to_gfn(tmp); + } + + vmaddr = gfn_to_hva(kvm, gfn); + rc = fixup_user_fault(gmap->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); + if (!rc) + rc = __gmap_link(gmap, gaddr, vmaddr); + return rc; +} + +/** + * __kvm_s390_mprotect_many() - Apply specified protection to guest pages + * @gmap: the gmap of the guest + * @gpa: the starting guest address + * @npages: how many pages to protect + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE + * @bits: pgste notification bits to set + * + * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one() + * + * Context: kvm->srcu and gmap->mm need to be held in read mode + */ +int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, + unsigned long bits) +{ + unsigned int fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0; + gpa_t end = gpa + npages * PAGE_SIZE; + int rc; + + for (; gpa < end; gpa = ALIGN(gpa + 1, rc)) { + rc = gmap_protect_one(gmap, gpa, prot, bits); + if (rc == -EAGAIN) { + __kvm_s390_fixup_fault_sync(gmap, gpa, fault_flag); + rc = gmap_protect_one(gmap, gpa, prot, bits); + } + if (rc < 0) + return rc; + } + + return 0; +} + +static int kvm_s390_mprotect_notify_prefix(struct kvm_vcpu *vcpu) +{ + gpa_t gaddr = kvm_s390_get_prefix(vcpu); + int idx, rc; + + idx = srcu_read_lock(&vcpu->kvm->srcu); + mmap_read_lock(vcpu->arch.gmap->mm); + + rc = __kvm_s390_mprotect_many(vcpu->arch.gmap, gaddr, 2, PROT_WRITE, GMAP_NOTIFY_MPROT); + + mmap_read_unlock(vcpu->arch.gmap->mm); + srcu_read_unlock(&vcpu->kvm->srcu, idx); + + return rc; +} + static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { retry: @@ -4513,9 +4595,8 @@ retry: */ if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { int rc; - rc = gmap_mprotect_notify(vcpu->arch.gmap, - kvm_s390_get_prefix(vcpu), - PAGE_SIZE * 2, PROT_WRITE); + + rc = kvm_s390_mprotect_notify_prefix(vcpu); if (rc) { kvm_make_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu); return rc; @@ -4766,11 +4847,111 @@ static int vcpu_post_run_addressing_exception(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_irq(vcpu, &pgm_info); } +static void kvm_s390_assert_primary_as(struct kvm_vcpu *vcpu) +{ + KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, + "Unexpected program interrupt 0x%x, TEID 0x%016lx", + current->thread.gmap_int_code, current->thread.gmap_teid.val); +} + +/* + * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu + * @vcpu: the vCPU whose gmap is to be fixed up + * @gfn: the guest frame number used for memslots (including fake memslots) + * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps + * @flags: FOLL_* flags + * + * Return: 0 on success, < 0 in case of error. + * Context: The mm lock must not be held before calling. May sleep. + */ +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags) +{ + struct kvm_memory_slot *slot; + unsigned int fault_flags; + bool writable, unlocked; + unsigned long vmaddr; + struct page *page; + kvm_pfn_t pfn; + int rc; + + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); + if (!slot || slot->flags & KVM_MEMSLOT_INVALID) + return vcpu_post_run_addressing_exception(vcpu); + + fault_flags = flags & FOLL_WRITE ? FAULT_FLAG_WRITE : 0; + if (vcpu->arch.gmap->pfault_enabled) + flags |= FOLL_NOWAIT; + vmaddr = __gfn_to_hva_memslot(slot, gfn); + +try_again: + pfn = __kvm_faultin_pfn(slot, gfn, flags, &writable, &page); + + /* Access outside memory, inject addressing exception */ + if (is_noslot_pfn(pfn)) + return vcpu_post_run_addressing_exception(vcpu); + /* Signal pending: try again */ + if (pfn == KVM_PFN_ERR_SIGPENDING) + return -EAGAIN; + + /* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */ + if (pfn == KVM_PFN_ERR_NEEDS_IO) { + trace_kvm_s390_major_guest_pfault(vcpu); + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + vcpu->stat.pfault_sync++; + /* Could not setup async pfault, try again synchronously */ + flags &= ~FOLL_NOWAIT; + goto try_again; + } + /* Any other error */ + if (is_error_pfn(pfn)) + return -EFAULT; + + /* Success */ + mmap_read_lock(vcpu->arch.gmap->mm); + /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */ + rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked); + if (!rc) + rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr); + scoped_guard(spinlock, &vcpu->kvm->mmu_lock) { + kvm_release_faultin_page(vcpu->kvm, page, false, writable); + } + mmap_read_unlock(vcpu->arch.gmap->mm); + return rc; +} + +static int vcpu_dat_fault_handler(struct kvm_vcpu *vcpu, unsigned long gaddr, unsigned int flags) +{ + unsigned long gaddr_tmp; + gfn_t gfn; + + gfn = gpa_to_gfn(gaddr); + if (kvm_is_ucontrol(vcpu->kvm)) { + /* + * This translates the per-vCPU guest address into a + * fake guest address, which can then be used with the + * fake memslots that are identity mapping userspace. + * This allows ucontrol VMs to use the normal fault + * resolution path, like normal VMs. + */ + mmap_read_lock(vcpu->arch.gmap->mm); + gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr); + mmap_read_unlock(vcpu->arch.gmap->mm); + if (gaddr_tmp == -EFAULT) { + vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; + vcpu->run->s390_ucontrol.trans_exc_code = gaddr; + vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION; + return -EREMOTE; + } + gfn = gpa_to_gfn(gaddr_tmp); + } + return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, flags); +} + static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) { unsigned int flags = 0; unsigned long gaddr; - int rc = 0; gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write()) @@ -4781,9 +4962,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) vcpu->stat.exit_null++; break; case PGM_NON_SECURE_STORAGE_ACCESS: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); + kvm_s390_assert_primary_as(vcpu); /* * This is normal operation; a page belonging to a protected * guest has not been imported yet. Try to import the page into @@ -4794,9 +4973,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) break; case PGM_SECURE_STORAGE_ACCESS: case PGM_SECURE_STORAGE_VIOLATION: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); + kvm_s390_assert_primary_as(vcpu); /* * This can happen after a reboot with asynchronous teardown; * the new guest (normal or protected) will run on top of the @@ -4825,40 +5002,15 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu) case PGM_REGION_FIRST_TRANS: case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: - KVM_BUG(current->thread.gmap_teid.as != PSW_BITS_AS_PRIMARY, vcpu->kvm, - "Unexpected program interrupt 0x%x, TEID 0x%016lx", - current->thread.gmap_int_code, current->thread.gmap_teid.val); - if (vcpu->arch.gmap->pfault_enabled) { - rc = gmap_fault(vcpu->arch.gmap, gaddr, flags | FAULT_FLAG_RETRY_NOWAIT); - if (rc == -EFAULT) - return vcpu_post_run_addressing_exception(vcpu); - if (rc == -EAGAIN) { - trace_kvm_s390_major_guest_pfault(vcpu); - if (kvm_arch_setup_async_pf(vcpu)) - return 0; - vcpu->stat.pfault_sync++; - } else { - return rc; - } - } - rc = gmap_fault(vcpu->arch.gmap, gaddr, flags); - if (rc == -EFAULT) { - if (kvm_is_ucontrol(vcpu->kvm)) { - vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; - vcpu->run->s390_ucontrol.trans_exc_code = gaddr; - vcpu->run->s390_ucontrol.pgm_code = 0x10; - return -EREMOTE; - } - return vcpu_post_run_addressing_exception(vcpu); - } - break; + kvm_s390_assert_primary_as(vcpu); + return vcpu_dat_fault_handler(vcpu, gaddr, flags); default: KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx", current->thread.gmap_int_code, current->thread.gmap_teid.val); send_sig(SIGSEGV, current, 0); break; } - return rc; + return 0; } static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) @@ -5737,7 +5889,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } #endif case KVM_S390_VCPU_FAULT: { - r = gmap_fault(vcpu->arch.gmap, arg, 0); + idx = srcu_read_lock(&vcpu->kvm->srcu); + r = vcpu_dat_fault_handler(vcpu, arg, 0); + srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } case KVM_ENABLE_CAP: @@ -5853,7 +6007,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, { gpa_t size; - if (kvm_is_ucontrol(kvm)) + if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS) return -EINVAL; /* When we are protected, we should not change the memory slots */ @@ -5905,6 +6059,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, { int rc = 0; + if (kvm_is_ucontrol(kvm)) + return; + switch (change) { case KVM_MR_DELETE: rc = gmap_unmap_segment(kvm->arch.gmap, old->base_gfn * PAGE_SIZE, diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 597d7a71deeb..8d3bbb2dd8d2 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -20,6 +20,8 @@ #include <asm/processor.h> #include <asm/sclp.h> +#define KVM_S390_UCONTROL_MEMSLOT (KVM_USER_MEM_SLOTS + 0) + static inline void kvm_s390_fpu_store(struct kvm_run *run) { fpu_stfpc(&run->s.regs.fpc); @@ -279,6 +281,15 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) return gd; } +static inline hva_t gpa_to_hva(struct kvm *kvm, gpa_t gpa) +{ + hva_t hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); + + if (!kvm_is_error_hva(hva)) + hva |= offset_in_page(gpa); + return hva; +} + /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); @@ -408,6 +419,14 @@ void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu); void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm); __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu); int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc); +int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsigned int flags); +int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsigned int prot, + unsigned long bits); + +static inline int kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gpa_t gaddr, unsigned int flags) +{ + return __kvm_s390_handle_dat_fault(vcpu, gpa_to_gfn(gaddr), gaddr, flags); +} /* implemented in diag.c */ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 75e81ba26d04..22c012aa5206 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -17,6 +17,7 @@ #include <linux/sched/mm.h> #include <linux/mmu_notifier.h> #include "kvm-s390.h" +#include "gmap.h" bool kvm_s390_pv_is_protected(struct kvm *kvm) { @@ -638,10 +639,28 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak, .tweak[1] = offset, }; int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb); + unsigned long vmaddr; + bool unlocked; *rc = uvcb.header.rc; *rrc = uvcb.header.rrc; + if (ret == -ENXIO) { + mmap_read_lock(kvm->mm); + vmaddr = gfn_to_hva(kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(vmaddr)) { + ret = -EFAULT; + } else { + ret = fixup_user_fault(kvm->mm, vmaddr, FAULT_FLAG_WRITE, &unlocked); + if (!ret) + ret = __gmap_link(kvm->arch.gmap, addr, vmaddr); + } + mmap_read_unlock(kvm->mm); + if (!ret) + return -EAGAIN; + return ret; + } + if (ret && ret != -EAGAIN) KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x", uvcb.gaddr, *rc, *rrc); @@ -660,6 +679,8 @@ int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx", addr, size); + guard(srcu)(&kvm->srcu); + while (offset < size) { ret = unpack_one(kvm, addr, tweak, offset, rc, rrc); if (ret == -EAGAIN) { diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index a687695d8f68..a78df3a4f353 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -13,6 +13,7 @@ #include <linux/bitmap.h> #include <linux/sched/signal.h> #include <linux/io.h> +#include <linux/mman.h> #include <asm/gmap.h> #include <asm/mmu_context.h> @@ -22,6 +23,11 @@ #include <asm/facility.h> #include "kvm-s390.h" #include "gaccess.h" +#include "gmap.h" + +enum vsie_page_flags { + VSIE_PAGE_IN_USE = 0, +}; struct vsie_page { struct kvm_s390_sie_block scb_s; /* 0x0000 */ @@ -46,7 +52,18 @@ struct vsie_page { gpa_t gvrd_gpa; /* 0x0240 */ gpa_t riccbd_gpa; /* 0x0248 */ gpa_t sdnx_gpa; /* 0x0250 */ - __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */ + /* + * guest address of the original SCB. Remains set for free vsie + * pages, so we can properly look them up in our addr_to_page + * radix tree. + */ + gpa_t scb_gpa; /* 0x0258 */ + /* + * Flags: must be set/cleared atomically after the vsie page can be + * looked up by other CPUs. + */ + unsigned long flags; /* 0x0260 */ + __u8 reserved[0x0700 - 0x0268]; /* 0x0268 */ struct kvm_s390_crypto_cb crycb; /* 0x0700 */ __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ }; @@ -584,7 +601,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, struct kvm *kvm = gmap->private; struct vsie_page *cur; unsigned long prefix; - struct page *page; int i; if (!gmap_is_shadow(gmap)) @@ -594,10 +610,9 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, * therefore we can safely reference them all the time. */ for (i = 0; i < kvm->arch.vsie.page_count; i++) { - page = READ_ONCE(kvm->arch.vsie.pages[i]); - if (!page) + cur = READ_ONCE(kvm->arch.vsie.pages[i]); + if (!cur) continue; - cur = page_to_virt(page); if (READ_ONCE(cur->gmap) != gmap) continue; prefix = cur->scb_s.prefix << GUEST_PREFIX_SHIFT; @@ -1345,6 +1360,20 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) return rc; } +/* Try getting a given vsie page, returning "true" on success. */ +static inline bool try_get_vsie_page(struct vsie_page *vsie_page) +{ + if (test_bit(VSIE_PAGE_IN_USE, &vsie_page->flags)) + return false; + return !test_and_set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); +} + +/* Put a vsie page acquired through get_vsie_page / try_get_vsie_page. */ +static void put_vsie_page(struct vsie_page *vsie_page) +{ + clear_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); +} + /* * Get or create a vsie page for a scb address. * @@ -1355,16 +1384,21 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) { struct vsie_page *vsie_page; - struct page *page; int nr_vcpus; rcu_read_lock(); - page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); + vsie_page = radix_tree_lookup(&kvm->arch.vsie.addr_to_page, addr >> 9); rcu_read_unlock(); - if (page) { - if (page_ref_inc_return(page) == 2) - return page_to_virt(page); - page_ref_dec(page); + if (vsie_page) { + if (try_get_vsie_page(vsie_page)) { + if (vsie_page->scb_gpa == addr) + return vsie_page; + /* + * We raced with someone reusing + putting this vsie + * page before we grabbed it. + */ + put_vsie_page(vsie_page); + } } /* @@ -1375,36 +1409,40 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) mutex_lock(&kvm->arch.vsie.mutex); if (kvm->arch.vsie.page_count < nr_vcpus) { - page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); - if (!page) { + vsie_page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO | GFP_DMA); + if (!vsie_page) { mutex_unlock(&kvm->arch.vsie.mutex); return ERR_PTR(-ENOMEM); } - page_ref_inc(page); - kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = page; + __set_bit(VSIE_PAGE_IN_USE, &vsie_page->flags); + kvm->arch.vsie.pages[kvm->arch.vsie.page_count] = vsie_page; kvm->arch.vsie.page_count++; } else { /* reuse an existing entry that belongs to nobody */ while (true) { - page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; - if (page_ref_inc_return(page) == 2) + vsie_page = kvm->arch.vsie.pages[kvm->arch.vsie.next]; + if (try_get_vsie_page(vsie_page)) break; - page_ref_dec(page); kvm->arch.vsie.next++; kvm->arch.vsie.next %= nr_vcpus; } - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); + if (vsie_page->scb_gpa != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + vsie_page->scb_gpa >> 9); } - page->index = addr; - /* double use of the same address */ - if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, page)) { - page_ref_dec(page); + /* Mark it as invalid until it resides in the tree. */ + vsie_page->scb_gpa = ULONG_MAX; + + /* Double use of the same address or allocation failure. */ + if (radix_tree_insert(&kvm->arch.vsie.addr_to_page, addr >> 9, + vsie_page)) { + put_vsie_page(vsie_page); mutex_unlock(&kvm->arch.vsie.mutex); return NULL; } + vsie_page->scb_gpa = addr; mutex_unlock(&kvm->arch.vsie.mutex); - vsie_page = page_to_virt(page); memset(&vsie_page->scb_s, 0, sizeof(struct kvm_s390_sie_block)); release_gmap_shadow(vsie_page); vsie_page->fault_addr = 0; @@ -1412,14 +1450,6 @@ static struct vsie_page *get_vsie_page(struct kvm *kvm, unsigned long addr) return vsie_page; } -/* put a vsie page acquired via get_vsie_page */ -static void put_vsie_page(struct kvm *kvm, struct vsie_page *vsie_page) -{ - struct page *page = pfn_to_page(__pa(vsie_page) >> PAGE_SHIFT); - - page_ref_dec(page); -} - int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu) { struct vsie_page *vsie_page; @@ -1470,7 +1500,7 @@ out_unshadow: out_unpin_scb: unpin_scb(vcpu, vsie_page, scb_addr); out_put: - put_vsie_page(vcpu->kvm, vsie_page); + put_vsie_page(vsie_page); return rc < 0 ? rc : 0; } @@ -1486,18 +1516,18 @@ void kvm_s390_vsie_init(struct kvm *kvm) void kvm_s390_vsie_destroy(struct kvm *kvm) { struct vsie_page *vsie_page; - struct page *page; int i; mutex_lock(&kvm->arch.vsie.mutex); for (i = 0; i < kvm->arch.vsie.page_count; i++) { - page = kvm->arch.vsie.pages[i]; + vsie_page = kvm->arch.vsie.pages[i]; kvm->arch.vsie.pages[i] = NULL; - vsie_page = page_to_virt(page); release_gmap_shadow(vsie_page); /* free the radix tree entry */ - radix_tree_delete(&kvm->arch.vsie.addr_to_page, page->index >> 9); - __free_page(page); + if (vsie_page->scb_gpa != ULONG_MAX) + radix_tree_delete(&kvm->arch.vsie.addr_to_page, + vsie_page->scb_gpa >> 9); + free_page((unsigned long)vsie_page); } kvm->arch.vsie.page_count = 0; mutex_unlock(&kvm->arch.vsie.mutex); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 16b8a36c56de..94d927785800 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -24,6 +24,16 @@ #include <asm/page.h> #include <asm/tlb.h> +/* + * The address is saved in a radix tree directly; NULL would be ambiguous, + * since 0 is a valid address, and NULL is returned when nothing was found. + * The lower bits are ignored by all users of the macro, so it can be used + * to distinguish a valid address 0 from a NULL. + */ +#define VALID_GADDR_FLAG 1 +#define IS_GADDR_VALID(gaddr) ((gaddr) & VALID_GADDR_FLAG) +#define MAKE_VALID_GADDR(gaddr) (((gaddr) & HPAGE_MASK) | VALID_GADDR_FLAG) + #define GMAP_SHADOW_FAKE_TABLE 1ULL static struct page *gmap_alloc_crst(void) @@ -43,7 +53,7 @@ static struct page *gmap_alloc_crst(void) * * Returns a guest address space structure. */ -static struct gmap *gmap_alloc(unsigned long limit) +struct gmap *gmap_alloc(unsigned long limit) { struct gmap *gmap; struct page *page; @@ -70,9 +80,7 @@ static struct gmap *gmap_alloc(unsigned long limit) gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT); if (!gmap) goto out; - INIT_LIST_HEAD(&gmap->crst_list); INIT_LIST_HEAD(&gmap->children); - INIT_LIST_HEAD(&gmap->pt_list); INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT); INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT); INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT); @@ -82,8 +90,6 @@ static struct gmap *gmap_alloc(unsigned long limit) page = gmap_alloc_crst(); if (!page) goto out_free; - page->index = 0; - list_add(&page->lru, &gmap->crst_list); table = page_to_virt(page); crst_table_init(table, etype); gmap->table = table; @@ -97,6 +103,7 @@ out_free: out: return NULL; } +EXPORT_SYMBOL_GPL(gmap_alloc); /** * gmap_create - create a guest address space @@ -185,32 +192,46 @@ static void gmap_rmap_radix_tree_free(struct radix_tree_root *root) } while (nr > 0); } +static void gmap_free_crst(unsigned long *table, bool free_ptes) +{ + bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0; + int i; + + if (is_segment) { + if (!free_ptes) + goto out; + for (i = 0; i < _CRST_ENTRIES; i++) + if (!(table[i] & _SEGMENT_ENTRY_INVALID)) + page_table_free_pgste(page_ptdesc(phys_to_page(table[i]))); + } else { + for (i = 0; i < _CRST_ENTRIES; i++) + if (!(table[i] & _REGION_ENTRY_INVALID)) + gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes); + } + +out: + free_pages((unsigned long)table, CRST_ALLOC_ORDER); +} + /** * gmap_free - free a guest address space * @gmap: pointer to the guest address space structure * * No locks required. There are no references to this gmap anymore. */ -static void gmap_free(struct gmap *gmap) +void gmap_free(struct gmap *gmap) { - struct page *page, *next; - /* Flush tlb of all gmaps (if not already done for shadows) */ if (!(gmap_is_shadow(gmap) && gmap->removed)) gmap_flush_tlb(gmap); /* Free all segment & region tables. */ - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, CRST_ALLOC_ORDER); + gmap_free_crst(gmap->table, gmap_is_shadow(gmap)); + gmap_radix_tree_free(&gmap->guest_to_host); gmap_radix_tree_free(&gmap->host_to_guest); /* Free additional data for a shadow gmap */ if (gmap_is_shadow(gmap)) { - struct ptdesc *ptdesc, *n; - - /* Free all page tables. */ - list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list) - page_table_free_pgste(ptdesc); gmap_rmap_radix_tree_free(&gmap->host_to_rmap); /* Release reference to the parent */ gmap_put(gmap->parent); @@ -218,6 +239,7 @@ static void gmap_free(struct gmap *gmap) kfree(gmap); } +EXPORT_SYMBOL_GPL(gmap_free); /** * gmap_get - increase reference counter for guest address space @@ -298,10 +320,8 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, crst_table_init(new, init); spin_lock(&gmap->guest_table_lock); if (*table & _REGION_ENTRY_INVALID) { - list_add(&page->lru, &gmap->crst_list); *table = __pa(new) | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); - page->index = gaddr; page = NULL; } spin_unlock(&gmap->guest_table_lock); @@ -310,21 +330,23 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, return 0; } -/** - * __gmap_segment_gaddr - find virtual address from segment pointer - * @entry: pointer to a segment table entry in the guest address space - * - * Returns the virtual address in the guest address space for the segment - */ -static unsigned long __gmap_segment_gaddr(unsigned long *entry) +static unsigned long host_to_guest_lookup(struct gmap *gmap, unsigned long vmaddr) { - struct page *page; - unsigned long offset; + return (unsigned long)radix_tree_lookup(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); +} - offset = (unsigned long) entry / sizeof(unsigned long); - offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - page = pmd_pgtable_page((pmd_t *) entry); - return page->index + offset; +static unsigned long host_to_guest_delete(struct gmap *gmap, unsigned long vmaddr) +{ + return (unsigned long)radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); +} + +static pmd_t *host_to_guest_pmd_delete(struct gmap *gmap, unsigned long vmaddr, + unsigned long *gaddr) +{ + *gaddr = host_to_guest_delete(gmap, vmaddr); + if (IS_GADDR_VALID(*gaddr)) + return (pmd_t *)gmap_table_walk(gmap, *gaddr, 1); + return NULL; } /** @@ -336,16 +358,19 @@ static unsigned long __gmap_segment_gaddr(unsigned long *entry) */ static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) { - unsigned long *entry; + unsigned long gaddr; int flush = 0; + pmd_t *pmdp; BUG_ON(gmap_is_shadow(gmap)); spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); - if (entry) { - flush = (*entry != _SEGMENT_ENTRY_EMPTY); - *entry = _SEGMENT_ENTRY_EMPTY; + + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { + flush = (pmd_val(*pmdp) != _SEGMENT_ENTRY_EMPTY); + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } + spin_unlock(&gmap->guest_table_lock); return flush; } @@ -464,26 +489,6 @@ unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) EXPORT_SYMBOL_GPL(__gmap_translate); /** - * gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - */ -unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long rc; - - mmap_read_lock(gmap->mm); - rc = __gmap_translate(gmap, gaddr); - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_translate); - -/** * gmap_unlink - disconnect a page table from the gmap shadow tables * @mm: pointer to the parent mm_struct * @table: pointer to the host page table @@ -582,7 +587,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) spin_lock(&gmap->guest_table_lock); if (*table == _SEGMENT_ENTRY_EMPTY) { rc = radix_tree_insert(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT, table); + vmaddr >> PMD_SHIFT, + (void *)MAKE_VALID_GADDR(gaddr)); if (!rc) { if (pmd_leaf(*pmd)) { *table = (pmd_val(*pmd) & @@ -605,130 +611,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) radix_tree_preload_end(); return rc; } - -/** - * fixup_user_fault_nowait - manually resolve a user page fault without waiting - * @mm: mm_struct of target mm - * @address: user address - * @fault_flags:flags to pass down to handle_mm_fault() - * @unlocked: did we unlock the mmap_lock while retrying - * - * This function behaves similarly to fixup_user_fault(), but it guarantees - * that the fault will be resolved without waiting. The function might drop - * and re-acquire the mm lock, in which case @unlocked will be set to true. - * - * The guarantee is that the fault is handled without waiting, but the - * function itself might sleep, due to the lock. - * - * Context: Needs to be called with mm->mmap_lock held in read mode, and will - * return with the lock held in read mode; @unlocked will indicate whether - * the lock has been dropped and re-acquired. This is the same behaviour as - * fixup_user_fault(). - * - * Return: 0 on success, -EAGAIN if the fault cannot be resolved without - * waiting, -EFAULT if the fault cannot be resolved, -ENOMEM if out of - * memory. - */ -static int fixup_user_fault_nowait(struct mm_struct *mm, unsigned long address, - unsigned int fault_flags, bool *unlocked) -{ - struct vm_area_struct *vma; - unsigned int test_flags; - vm_fault_t fault; - int rc; - - fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT; - test_flags = fault_flags & FAULT_FLAG_WRITE ? VM_WRITE : VM_READ; - - vma = find_vma(mm, address); - if (unlikely(!vma || address < vma->vm_start)) - return -EFAULT; - if (unlikely(!(vma->vm_flags & test_flags))) - return -EFAULT; - - fault = handle_mm_fault(vma, address, fault_flags, NULL); - /* the mm lock has been dropped, take it again */ - if (fault & VM_FAULT_COMPLETED) { - *unlocked = true; - mmap_read_lock(mm); - return 0; - } - /* the mm lock has not been dropped */ - if (fault & VM_FAULT_ERROR) { - rc = vm_fault_to_errno(fault, 0); - BUG_ON(!rc); - return rc; - } - /* the mm lock has not been dropped because of FAULT_FLAG_RETRY_NOWAIT */ - if (fault & VM_FAULT_RETRY) - return -EAGAIN; - /* nothing needed to be done and the mm lock has not been dropped */ - return 0; -} - -/** - * __gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Context: Needs to be called with mm->mmap_lock held in read mode. Might - * drop and re-acquire the lock. Will always return with the lock held. - */ -static int __gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) -{ - unsigned long vmaddr; - bool unlocked; - int rc = 0; - -retry: - unlocked = false; - - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) - return vmaddr; - - if (fault_flags & FAULT_FLAG_RETRY_NOWAIT) - rc = fixup_user_fault_nowait(gmap->mm, vmaddr, fault_flags, &unlocked); - else - rc = fixup_user_fault(gmap->mm, vmaddr, fault_flags, &unlocked); - if (rc) - return rc; - /* - * In the case that fixup_user_fault unlocked the mmap_lock during - * fault-in, redo __gmap_translate() to avoid racing with a - * map/unmap_segment. - * In particular, __gmap_translate(), fixup_user_fault{,_nowait}(), - * and __gmap_link() must all be called atomically in one go; if the - * lock had been dropped in between, a retry is needed. - */ - if (unlocked) - goto retry; - - return __gmap_link(gmap, gaddr, vmaddr); -} - -/** - * gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Returns 0 on success, -ENOMEM for out of memory conditions, -EFAULT if the - * vm address is already mapped to a different guest segment, and -EAGAIN if - * FAULT_FLAG_RETRY_NOWAIT was specified and the fault could not be processed - * immediately. - */ -int gmap_fault(struct gmap *gmap, unsigned long gaddr, unsigned int fault_flags) -{ - int rc; - - mmap_read_lock(gmap->mm); - rc = __gmap_fault(gmap, gaddr, fault_flags); - mmap_read_unlock(gmap->mm); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_fault); +EXPORT_SYMBOL(__gmap_link); /* * this function is assumed to be called with mmap_lock held @@ -853,8 +736,7 @@ static void gmap_call_notifier(struct gmap *gmap, unsigned long start, * * Note: Can also be called for shadow gmaps. */ -static inline unsigned long *gmap_table_walk(struct gmap *gmap, - unsigned long gaddr, int level) +unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { const int asce_type = gmap->asce & _ASCE_TYPE_MASK; unsigned long *table = gmap->table; @@ -905,6 +787,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, } return table; } +EXPORT_SYMBOL(gmap_table_walk); /** * gmap_pte_op_walk - walk the gmap page table, get the page table lock @@ -1101,86 +984,40 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE * @bits: pgste notification bits to set * - * Returns 0 if successfully protected, -ENOMEM if out of memory and - * -EFAULT if gaddr is invalid (or mapping for shadows is missing). + * Returns: + * PAGE_SIZE if a small page was successfully protected; + * HPAGE_SIZE if a large page was successfully protected; + * -ENOMEM if out of memory; + * -EFAULT if gaddr is invalid (or mapping for shadows is missing); + * -EAGAIN if the guest mapping is missing and should be fixed by the caller. * - * Called with sg->mm->mmap_lock in read. + * Context: Called with sg->mm->mmap_lock in read. */ -static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, - unsigned long len, int prot, unsigned long bits) +int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned long bits) { - unsigned long vmaddr, dist; pmd_t *pmdp; - int rc; + int rc = 0; BUG_ON(gmap_is_shadow(gmap)); - while (len) { - rc = -EAGAIN; - pmdp = gmap_pmd_op_walk(gmap, gaddr); - if (pmdp) { - if (!pmd_leaf(*pmdp)) { - rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, - bits); - if (!rc) { - len -= PAGE_SIZE; - gaddr += PAGE_SIZE; - } - } else { - rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, - bits); - if (!rc) { - dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK); - len = len < dist ? 0 : len - dist; - gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE; - } - } - gmap_pmd_op_end(gmap, pmdp); - } - if (rc) { - if (rc == -EINVAL) - return rc; - /* -EAGAIN, fixup of userspace mm and gmap */ - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) - return vmaddr; - rc = gmap_pte_op_fixup(gmap, gaddr, vmaddr, prot); - if (rc) - return rc; - } - } - return 0; -} + pmdp = gmap_pmd_op_walk(gmap, gaddr); + if (!pmdp) + return -EAGAIN; -/** - * gmap_mprotect_notify - change access rights for a range of ptes and - * call the notifier if any pte changes again - * @gmap: pointer to guest mapping meta data structure - * @gaddr: virtual address in the guest address space - * @len: size of area - * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE - * - * Returns 0 if for each page in the given range a gmap mapping exists, - * the new access rights could be set and the notifier could be armed. - * If the gmap mapping is missing for one or more pages -EFAULT is - * returned. If no memory could be allocated -ENOMEM is returned. - * This function establishes missing page table entries. - */ -int gmap_mprotect_notify(struct gmap *gmap, unsigned long gaddr, - unsigned long len, int prot) -{ - int rc; + if (!pmd_leaf(*pmdp)) { + rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, bits); + if (!rc) + rc = PAGE_SIZE; + } else { + rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, bits); + if (!rc) + rc = HPAGE_SIZE; + } + gmap_pmd_op_end(gmap, pmdp); - if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK) || gmap_is_shadow(gmap)) - return -EINVAL; - if (!MACHINE_HAS_ESOP && prot == PROT_READ) - return -EINVAL; - mmap_read_lock(gmap->mm); - rc = gmap_protect_range(gmap, gaddr, len, prot, GMAP_NOTIFY_MPROT); - mmap_read_unlock(gmap->mm); return rc; } -EXPORT_SYMBOL_GPL(gmap_mprotect_notify); +EXPORT_SYMBOL_GPL(gmap_protect_one); /** * gmap_read_table - get an unsigned long value from a guest page table using @@ -1414,7 +1251,6 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ ptdesc = page_ptdesc(phys_to_page(pgt)); - list_del(&ptdesc->pt_list); page_table_free_pgste(ptdesc); } @@ -1442,7 +1278,6 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ ptdesc = page_ptdesc(phys_to_page(pgt)); - list_del(&ptdesc->pt_list); page_table_free_pgste(ptdesc); } } @@ -1472,7 +1307,6 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ page = phys_to_page(sgt); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1500,7 +1334,6 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ page = phys_to_page(sgt); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1530,7 +1363,6 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ page = phys_to_page(r3t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1558,7 +1390,6 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ page = phys_to_page(r3t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1588,7 +1419,6 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) __gmap_unshadow_r2t(sg, raddr, __va(r2t)); /* Free region 2 table */ page = phys_to_page(r2t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1620,7 +1450,6 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, r1t[i] = _REGION1_ENTRY_EMPTY; /* Free region 2 table */ page = phys_to_page(r2t); - list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } } @@ -1631,7 +1460,7 @@ static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, * * Called with sg->guest_table_lock */ -static void gmap_unshadow(struct gmap *sg) +void gmap_unshadow(struct gmap *sg) { unsigned long *table; @@ -1657,143 +1486,7 @@ static void gmap_unshadow(struct gmap *sg) break; } } - -/** - * gmap_find_shadow - find a specific asce in the list of shadow tables - * @parent: pointer to the parent gmap - * @asce: ASCE for which the shadow table is created - * @edat_level: edat level to be used for the shadow translation - * - * Returns the pointer to a gmap if a shadow table with the given asce is - * already available, ERR_PTR(-EAGAIN) if another one is just being created, - * otherwise NULL - */ -static struct gmap *gmap_find_shadow(struct gmap *parent, unsigned long asce, - int edat_level) -{ - struct gmap *sg; - - list_for_each_entry(sg, &parent->children, list) { - if (sg->orig_asce != asce || sg->edat_level != edat_level || - sg->removed) - continue; - if (!sg->initialized) - return ERR_PTR(-EAGAIN); - refcount_inc(&sg->ref_count); - return sg; - } - return NULL; -} - -/** - * gmap_shadow_valid - check if a shadow guest address space matches the - * given properties and is still valid - * @sg: pointer to the shadow guest address space structure - * @asce: ASCE for which the shadow table is requested - * @edat_level: edat level to be used for the shadow translation - * - * Returns 1 if the gmap shadow is still valid and matches the given - * properties, the caller can continue using it. Returns 0 otherwise, the - * caller has to request a new shadow gmap in this case. - * - */ -int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level) -{ - if (sg->removed) - return 0; - return sg->orig_asce == asce && sg->edat_level == edat_level; -} -EXPORT_SYMBOL_GPL(gmap_shadow_valid); - -/** - * gmap_shadow - create/find a shadow guest address space - * @parent: pointer to the parent gmap - * @asce: ASCE for which the shadow table is created - * @edat_level: edat level to be used for the shadow translation - * - * The pages of the top level page table referred by the asce parameter - * will be set to read-only and marked in the PGSTEs of the kvm process. - * The shadow table will be removed automatically on any change to the - * PTE mapping for the source table. - * - * Returns a guest address space structure, ERR_PTR(-ENOMEM) if out of memory, - * ERR_PTR(-EAGAIN) if the caller has to retry and ERR_PTR(-EFAULT) if the - * parent gmap table could not be protected. - */ -struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, - int edat_level) -{ - struct gmap *sg, *new; - unsigned long limit; - int rc; - - BUG_ON(parent->mm->context.allow_gmap_hpage_1m); - BUG_ON(gmap_is_shadow(parent)); - spin_lock(&parent->shadow_lock); - sg = gmap_find_shadow(parent, asce, edat_level); - spin_unlock(&parent->shadow_lock); - if (sg) - return sg; - /* Create a new shadow gmap */ - limit = -1UL >> (33 - (((asce & _ASCE_TYPE_MASK) >> 2) * 11)); - if (asce & _ASCE_REAL_SPACE) - limit = -1UL; - new = gmap_alloc(limit); - if (!new) - return ERR_PTR(-ENOMEM); - new->mm = parent->mm; - new->parent = gmap_get(parent); - new->private = parent->private; - new->orig_asce = asce; - new->edat_level = edat_level; - new->initialized = false; - spin_lock(&parent->shadow_lock); - /* Recheck if another CPU created the same shadow */ - sg = gmap_find_shadow(parent, asce, edat_level); - if (sg) { - spin_unlock(&parent->shadow_lock); - gmap_free(new); - return sg; - } - if (asce & _ASCE_REAL_SPACE) { - /* only allow one real-space gmap shadow */ - list_for_each_entry(sg, &parent->children, list) { - if (sg->orig_asce & _ASCE_REAL_SPACE) { - spin_lock(&sg->guest_table_lock); - gmap_unshadow(sg); - spin_unlock(&sg->guest_table_lock); - list_del(&sg->list); - gmap_put(sg); - break; - } - } - } - refcount_set(&new->ref_count, 2); - list_add(&new->list, &parent->children); - if (asce & _ASCE_REAL_SPACE) { - /* nothing to protect, return right away */ - new->initialized = true; - spin_unlock(&parent->shadow_lock); - return new; - } - spin_unlock(&parent->shadow_lock); - /* protect after insertion, so it will get properly invalidated */ - mmap_read_lock(parent->mm); - rc = gmap_protect_range(parent, asce & _ASCE_ORIGIN, - ((asce & _ASCE_TABLE_LENGTH) + 1) * PAGE_SIZE, - PROT_READ, GMAP_NOTIFY_SHADOW); - mmap_read_unlock(parent->mm); - spin_lock(&parent->shadow_lock); - new->initialized = true; - if (rc) { - list_del(&new->list); - gmap_free(new); - new = ERR_PTR(rc); - } - spin_unlock(&parent->shadow_lock); - return new; -} -EXPORT_SYMBOL_GPL(gmap_shadow); +EXPORT_SYMBOL(gmap_unshadow); /** * gmap_shadow_r2t - create an empty shadow region 2 table @@ -1827,9 +1520,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = r2t & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_r2t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -1851,7 +1541,6 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r2t & _REGION_ENTRY_PROTECT); - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -1911,9 +1600,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = r3t & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_r3t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -1935,7 +1621,6 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r3t & _REGION_ENTRY_PROTECT); - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -1995,9 +1680,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = sgt & _REGION_ENTRY_ORIGIN; - if (fake) - page->index |= GMAP_SHADOW_FAKE_TABLE; s_sgt = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); @@ -2019,7 +1701,6 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= sgt & _REGION_ENTRY_PROTECT; - list_add(&page->lru, &sg->crst_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_REGION_ENTRY_INVALID; @@ -2052,45 +1733,22 @@ out_free: } EXPORT_SYMBOL_GPL(gmap_shadow_sgt); -/** - * gmap_shadow_pgt_lookup - find a shadow page table - * @sg: pointer to the shadow guest address space structure - * @saddr: the address in the shadow aguest address space - * @pgt: parent gmap address of the page table to get shadowed - * @dat_protection: if the pgtable is marked as protected by dat - * @fake: pgt references contiguous guest memory block, not a pgtable - * - * Returns 0 if the shadow page table was found and -EAGAIN if the page - * table was not found. - * - * Called with sg->mm->mmap_lock in read. - */ -int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr, - unsigned long *pgt, int *dat_protection, - int *fake) +static void gmap_pgste_set_pgt_addr(struct ptdesc *ptdesc, unsigned long pgt_addr) { - unsigned long *table; - struct page *page; - int rc; + unsigned long *pgstes = page_to_virt(ptdesc_page(ptdesc)); - BUG_ON(!gmap_is_shadow(sg)); - spin_lock(&sg->guest_table_lock); - table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ - if (table && !(*table & _SEGMENT_ENTRY_INVALID)) { - /* Shadow page tables are full pages (pte+pgste) */ - page = pfn_to_page(*table >> PAGE_SHIFT); - *pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE; - *dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT); - *fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE); - rc = 0; - } else { - rc = -EAGAIN; - } - spin_unlock(&sg->guest_table_lock); - return rc; + pgstes += _PAGE_ENTRIES; + + pgstes[0] &= ~PGSTE_ST2_MASK; + pgstes[1] &= ~PGSTE_ST2_MASK; + pgstes[2] &= ~PGSTE_ST2_MASK; + pgstes[3] &= ~PGSTE_ST2_MASK; + pgstes[0] |= (pgt_addr >> 16) & PGSTE_ST2_MASK; + pgstes[1] |= pgt_addr & PGSTE_ST2_MASK; + pgstes[2] |= (pgt_addr << 16) & PGSTE_ST2_MASK; + pgstes[3] |= (pgt_addr << 32) & PGSTE_ST2_MASK; } -EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup); /** * gmap_shadow_pgt - instantiate a shadow page table @@ -2119,9 +1777,10 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, ptdesc = page_table_alloc_pgste(sg->mm); if (!ptdesc) return -ENOMEM; - ptdesc->pt_index = pgt & _SEGMENT_ENTRY_ORIGIN; + origin = pgt & _SEGMENT_ENTRY_ORIGIN; if (fake) - ptdesc->pt_index |= GMAP_SHADOW_FAKE_TABLE; + origin |= GMAP_SHADOW_FAKE_TABLE; + gmap_pgste_set_pgt_addr(ptdesc, origin); s_pgt = page_to_phys(ptdesc_page(ptdesc)); /* Install shadow page table */ spin_lock(&sg->guest_table_lock); @@ -2140,7 +1799,6 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, /* mark as invalid as long as the parent table is not protected */ *table = (unsigned long) s_pgt | _SEGMENT_ENTRY | (pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID; - list_add(&ptdesc->pt_list, &sg->pt_list); if (fake) { /* nothing to protect for fake tables */ *table &= ~_SEGMENT_ENTRY_INVALID; @@ -2318,7 +1976,6 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte, unsigned long bits) { unsigned long offset, gaddr = 0; - unsigned long *table; struct gmap *gmap, *sg, *next; offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); @@ -2326,12 +1983,9 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - table = radix_tree_lookup(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (table) - gaddr = __gmap_segment_gaddr(table) + offset; + gaddr = host_to_guest_lookup(gmap, vmaddr) + offset; spin_unlock(&gmap->guest_table_lock); - if (!table) + if (!IS_GADDR_VALID(gaddr)) continue; if (!list_empty(&gmap->children) && (bits & PGSTE_VSIE_BIT)) { @@ -2391,10 +2045,8 @@ static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr, rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - pmdp = (pmd_t *)radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); if (pmdp) { - gaddr = __gmap_segment_gaddr((unsigned long *)pmdp); pmdp_notify_gmap(gmap, pmdp, gaddr); WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | _SEGMENT_ENTRY_GMAP_UC | @@ -2438,28 +2090,25 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_csp); */ void gmap_pmdp_idte_local(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *entry, gaddr; + unsigned long gaddr; struct gmap *gmap; pmd_t *pmdp; rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (entry) { - pmdp = (pmd_t *)entry; - gaddr = __gmap_segment_gaddr(entry); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC | - _SEGMENT_ENTRY)); + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_LOCAL); else if (MACHINE_HAS_IDTE) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_LOCAL); - *entry = _SEGMENT_ENTRY_EMPTY; + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } spin_unlock(&gmap->guest_table_lock); } @@ -2474,22 +2123,19 @@ EXPORT_SYMBOL_GPL(gmap_pmdp_idte_local); */ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) { - unsigned long *entry, gaddr; + unsigned long gaddr; struct gmap *gmap; pmd_t *pmdp; rcu_read_lock(); list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (entry) { - pmdp = (pmd_t *)entry; - gaddr = __gmap_segment_gaddr(entry); + pmdp = host_to_guest_pmd_delete(gmap, vmaddr, &gaddr); + if (pmdp) { pmdp_notify_gmap(gmap, pmdp, gaddr); - WARN_ON(*entry & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | - _SEGMENT_ENTRY_GMAP_UC | - _SEGMENT_ENTRY)); + WARN_ON(pmd_val(*pmdp) & ~(_SEGMENT_ENTRY_HARDWARE_BITS_LARGE | + _SEGMENT_ENTRY_GMAP_UC | + _SEGMENT_ENTRY)); if (MACHINE_HAS_TLB_GUEST) __pmdp_idte(gaddr, pmdp, IDTE_GUEST_ASCE, gmap->asce, IDTE_GLOBAL); @@ -2497,7 +2143,7 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr) __pmdp_idte(gaddr, pmdp, 0, 0, IDTE_GLOBAL); else __pmdp_csp(pmdp); - *entry = _SEGMENT_ENTRY_EMPTY; + *pmdp = __pmd(_SEGMENT_ENTRY_EMPTY); } spin_unlock(&gmap->guest_table_lock); } @@ -2943,49 +2589,6 @@ int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start, EXPORT_SYMBOL_GPL(__s390_uv_destroy_range); /** - * s390_unlist_old_asce - Remove the topmost level of page tables from the - * list of page tables of the gmap. - * @gmap: the gmap whose table is to be removed - * - * On s390x, KVM keeps a list of all pages containing the page tables of the - * gmap (the CRST list). This list is used at tear down time to free all - * pages that are now not needed anymore. - * - * This function removes the topmost page of the tree (the one pointed to by - * the ASCE) from the CRST list. - * - * This means that it will not be freed when the VM is torn down, and needs - * to be handled separately by the caller, unless a leak is actually - * intended. Notice that this function will only remove the page from the - * list, the page will still be used as a top level page table (and ASCE). - */ -void s390_unlist_old_asce(struct gmap *gmap) -{ - struct page *old; - - old = virt_to_page(gmap->table); - spin_lock(&gmap->guest_table_lock); - list_del(&old->lru); - /* - * Sometimes the topmost page might need to be "removed" multiple - * times, for example if the VM is rebooted into secure mode several - * times concurrently, or if s390_replace_asce fails after calling - * s390_remove_old_asce and is attempted again later. In that case - * the old asce has been removed from the list, and therefore it - * will not be freed when the VM terminates, but the ASCE is still - * in use and still pointed to. - * A subsequent call to replace_asce will follow the pointer and try - * to remove the same page from the list again. - * Therefore it's necessary that the page of the ASCE has valid - * pointers, so list_del can work (and do nothing) without - * dereferencing stale or invalid pointers. - */ - INIT_LIST_HEAD(&old->lru); - spin_unlock(&gmap->guest_table_lock); -} -EXPORT_SYMBOL_GPL(s390_unlist_old_asce); - -/** * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy * @gmap: the gmap whose ASCE needs to be replaced * @@ -3004,8 +2607,6 @@ int s390_replace_asce(struct gmap *gmap) struct page *page; void *table; - s390_unlist_old_asce(gmap); - /* Replacing segment type ASCEs would cause serious issues */ if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT) return -EINVAL; @@ -3013,19 +2614,9 @@ int s390_replace_asce(struct gmap *gmap) page = gmap_alloc_crst(); if (!page) return -ENOMEM; - page->index = 0; table = page_to_virt(page); memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); - /* - * The caller has to deal with the old ASCE, but here we make sure - * the new one is properly added to the CRST list, so that - * it will be freed when the VM is torn down. - */ - spin_lock(&gmap->guest_table_lock); - list_add(&page->lru, &gmap->crst_list); - spin_unlock(&gmap->guest_table_lock); - /* Set new table origin while preserving existing ASCE control bits */ asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table); WRITE_ONCE(gmap->asce, asce); @@ -3035,3 +2626,31 @@ int s390_replace_asce(struct gmap *gmap) return 0; } EXPORT_SYMBOL_GPL(s390_replace_asce); + +/** + * kvm_s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split + * @mm: the mm containing the folio to work on + * @folio: the folio + * @split: whether to split a large folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + */ +int kvm_s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split) +{ + int rc; + + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); + if (split) { + folio_lock(folio); + rc = split_folio(folio); + folio_unlock(folio); + + if (rc != -EBUSY) + return rc; + } + return -EAGAIN; +} +EXPORT_SYMBOL_GPL(kvm_s390_wiggle_split_folio); diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index d9ce199953de..2e568f175cd4 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -188,8 +188,8 @@ pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep) return __rste_to_pte(pte_val(*ptep)); } -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) +pte_t __huge_ptep_get_and_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { pte_t pte = huge_ptep_get(mm, addr, ptep); pmd_t *pmdp = (pmd_t *) ptep; diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index cd2fef79ad2c..30387a6e98ff 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -176,8 +176,6 @@ unsigned long *page_table_alloc(struct mm_struct *mm) } table = ptdesc_to_virt(ptdesc); __arch_set_page_dat(table, 1); - /* pt_list is used by gmap only */ - INIT_LIST_HEAD(&ptdesc->pt_list); memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE); memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE); return table; diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 857afbc4828f..39a481ec4a40 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -331,6 +331,17 @@ error: return rc; } +static bool zpci_bus_is_isolated_vf(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + struct pci_dev *pdev; + + pdev = zpci_iov_find_parent_pf(zbus, zdev); + if (!pdev) + return true; + pci_dev_put(pdev); + return false; +} + int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) { bool topo_is_tid = zdev->tid_avail; @@ -345,6 +356,15 @@ int zpci_bus_device_register(struct zpci_dev *zdev, struct pci_ops *ops) topo = topo_is_tid ? zdev->tid : zdev->pchid; zbus = zpci_bus_get(topo, topo_is_tid); + /* + * An isolated VF gets its own domain/bus even if there exists + * a matching domain/bus already + */ + if (zbus && zpci_bus_is_isolated_vf(zbus, zdev)) { + zpci_bus_put(zbus); + zbus = NULL; + } + if (!zbus) { zbus = zpci_bus_alloc(topo, topo_is_tid); if (!zbus) diff --git a/arch/s390/pci/pci_iov.c b/arch/s390/pci/pci_iov.c index ead062bf2b41..191e56a623f6 100644 --- a/arch/s390/pci/pci_iov.c +++ b/arch/s390/pci/pci_iov.c @@ -60,18 +60,35 @@ static int zpci_iov_link_virtfn(struct pci_dev *pdev, struct pci_dev *virtfn, in return 0; } -int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +/** + * zpci_iov_find_parent_pf - Find the parent PF, if any, of the given function + * @zbus: The bus that the PCI function is on, or would be added on + * @zdev: The PCI function + * + * Finds the parent PF, if it exists and is configured, of the given PCI function + * and increments its refcount. Th PF is searched for on the provided bus so the + * caller has to ensure that this is the correct bus to search. This function may + * be used before adding the PCI function to a zbus. + * + * Return: Pointer to the struct pci_dev of the parent PF or NULL if it not + * found. If the function is not a VF or has no RequesterID information, + * NULL is returned as well. + */ +struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) { - int i, cand_devfn; - struct zpci_dev *zdev; + int i, vfid, devfn, cand_devfn; struct pci_dev *pdev; - int vfid = vfn - 1; /* Linux' vfid's start at 0 vfn at 1*/ - int rc = 0; if (!zbus->multifunction) - return 0; - - /* If the parent PF for the given VF is also configured in the + return NULL; + /* Non-VFs and VFs without RID available don't have a parent */ + if (!zdev->vfn || !zdev->rid_available) + return NULL; + /* Linux vfid starts at 0 vfn at 1 */ + vfid = zdev->vfn - 1; + devfn = zdev->rid & ZPCI_RID_MASK_DEVFN; + /* + * If the parent PF for the given VF is also configured in the * instance, it must be on the same zbus. * We can then identify the parent PF by checking what * devfn the VF would have if it belonged to that PF using the PF's @@ -85,15 +102,26 @@ int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn if (!pdev) continue; cand_devfn = pci_iov_virtfn_devfn(pdev, vfid); - if (cand_devfn == virtfn->devfn) { - rc = zpci_iov_link_virtfn(pdev, virtfn, vfid); - /* balance pci_get_slot() */ - pci_dev_put(pdev); - break; - } + if (cand_devfn == devfn) + return pdev; /* balance pci_get_slot() */ pci_dev_put(pdev); } } + return NULL; +} + +int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn) +{ + struct zpci_dev *zdev = to_zpci(virtfn); + struct pci_dev *pdev_pf; + int rc = 0; + + pdev_pf = zpci_iov_find_parent_pf(zbus, zdev); + if (pdev_pf) { + /* Linux' vfids start at 0 while zdev->vfn starts at 1 */ + rc = zpci_iov_link_virtfn(pdev_pf, virtfn, zdev->vfn - 1); + pci_dev_put(pdev_pf); + } return rc; } diff --git a/arch/s390/pci/pci_iov.h b/arch/s390/pci/pci_iov.h index e3fa4e77fc86..d2c2793eb0f3 100644 --- a/arch/s390/pci/pci_iov.h +++ b/arch/s390/pci/pci_iov.h @@ -19,6 +19,8 @@ void zpci_iov_map_resources(struct pci_dev *pdev); int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *virtfn, int vfn); +struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev); + #else /* CONFIG_PCI_IOV */ static inline void zpci_iov_remove_virtfn(struct pci_dev *pdev, int vfn) {} @@ -28,5 +30,10 @@ static inline int zpci_iov_setup_virtfn(struct zpci_bus *zbus, struct pci_dev *v { return 0; } + +static inline struct pci_dev *zpci_iov_find_parent_pf(struct zpci_bus *zbus, struct zpci_dev *zdev) +{ + return NULL; +} #endif /* CONFIG_PCI_IOV */ #endif /* __S390_PCI_IOV_h */ diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index bdcf2a3b6c41..bd39b36e7bd6 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -8,7 +8,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE $(call if_changed_rule,cc_o_c) -CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY +CFLAGS_sha256.o := -D__NO_FORTIFY $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE $(call if_changed_rule,as_o_S) @@ -19,9 +19,11 @@ KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding KBUILD_CFLAGS += -Os -m64 -msoft-float -fno-common KBUILD_CFLAGS += -fno-stack-protector KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS += -D__DISABLE_EXPORTS KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_CFLAGS += $(call cc-option,-fno-PIE) KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS)) +KBUILD_AFLAGS += -D__DISABLE_EXPORTS # Since we link purgatory with -r unresolved symbols are not checked, so we # also link a purgatory.chk binary without -r to check for unresolved symbols. diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h index c714ca6a05aa..e7a9cdd498dc 100644 --- a/arch/sparc/include/asm/hugetlb.h +++ b/arch/sparc/include/asm/hugetlb.h @@ -20,7 +20,7 @@ void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep); + pte_t *ptep, unsigned long sz); #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index eee601a0d2cf..80504148d8a5 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -260,7 +260,7 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) + pte_t *ptep, unsigned long sz) { unsigned int i, nptes, orig_shift, shift; unsigned long size; diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index 744e7f31e8ef..dd5580f975cc 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -25,8 +25,10 @@ #define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32)) #define NUM_IRQ_MSGS 10 -#define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1)) -#define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1) +struct um_pci_message_buffer { + struct virtio_pcidev_msg hdr; + u8 data[8]; +}; struct um_pci_device { struct virtio_device *vdev; @@ -36,6 +38,11 @@ struct um_pci_device { struct virtqueue *cmd_vq, *irq_vq; +#define UM_PCI_WRITE_BUFS 20 + struct um_pci_message_buffer bufs[UM_PCI_WRITE_BUFS + 1]; + void *extra_ptrs[UM_PCI_WRITE_BUFS + 1]; + DECLARE_BITMAP(used_bufs, UM_PCI_WRITE_BUFS); + #define UM_PCI_STAT_WAITING 0 unsigned long status; @@ -61,12 +68,40 @@ static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)]; static unsigned int um_pci_max_delay_us = 40000; module_param_named(max_delay_us, um_pci_max_delay_us, uint, 0644); -struct um_pci_message_buffer { - struct virtio_pcidev_msg hdr; - u8 data[8]; -}; +static int um_pci_get_buf(struct um_pci_device *dev, bool *posted) +{ + int i; + + for (i = 0; i < UM_PCI_WRITE_BUFS; i++) { + if (!test_and_set_bit(i, dev->used_bufs)) + return i; + } -static struct um_pci_message_buffer __percpu *um_pci_msg_bufs; + *posted = false; + return UM_PCI_WRITE_BUFS; +} + +static void um_pci_free_buf(struct um_pci_device *dev, void *buf) +{ + int i; + + if (buf == &dev->bufs[UM_PCI_WRITE_BUFS]) { + kfree(dev->extra_ptrs[UM_PCI_WRITE_BUFS]); + dev->extra_ptrs[UM_PCI_WRITE_BUFS] = NULL; + return; + } + + for (i = 0; i < UM_PCI_WRITE_BUFS; i++) { + if (buf == &dev->bufs[i]) { + kfree(dev->extra_ptrs[i]); + dev->extra_ptrs[i] = NULL; + WARN_ON(!test_and_clear_bit(i, dev->used_bufs)); + return; + } + } + + WARN_ON(1); +} static int um_pci_send_cmd(struct um_pci_device *dev, struct virtio_pcidev_msg *cmd, @@ -82,7 +117,9 @@ static int um_pci_send_cmd(struct um_pci_device *dev, }; struct um_pci_message_buffer *buf; int delay_count = 0; + bool bounce_out; int ret, len; + int buf_idx; bool posted; if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf))) @@ -101,26 +138,28 @@ static int um_pci_send_cmd(struct um_pci_device *dev, break; } - buf = get_cpu_var(um_pci_msg_bufs); - if (buf) - memcpy(buf, cmd, cmd_size); + bounce_out = !posted && cmd_size <= sizeof(*cmd) && + out && out_size <= sizeof(buf->data); - if (posted) { - u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC); - - if (ncmd) { - memcpy(ncmd, cmd, cmd_size); - if (extra) - memcpy(ncmd + cmd_size, extra, extra_size); - cmd = (void *)ncmd; - cmd_size += extra_size; - extra = NULL; - extra_size = 0; - } else { - /* try without allocating memory */ - posted = false; - cmd = (void *)buf; + buf_idx = um_pci_get_buf(dev, &posted); + buf = &dev->bufs[buf_idx]; + memcpy(buf, cmd, cmd_size); + + if (posted && extra && extra_size > sizeof(buf) - cmd_size) { + dev->extra_ptrs[buf_idx] = kmemdup(extra, extra_size, + GFP_ATOMIC); + + if (!dev->extra_ptrs[buf_idx]) { + um_pci_free_buf(dev, buf); + return -ENOMEM; } + extra = dev->extra_ptrs[buf_idx]; + } else if (extra && extra_size <= sizeof(buf) - cmd_size) { + memcpy((u8 *)buf + cmd_size, extra, extra_size); + cmd_size += extra_size; + extra_size = 0; + extra = NULL; + cmd = (void *)buf; } else { cmd = (void *)buf; } @@ -128,39 +167,40 @@ static int um_pci_send_cmd(struct um_pci_device *dev, sg_init_one(&out_sg, cmd, cmd_size); if (extra) sg_init_one(&extra_sg, extra, extra_size); - if (out) + /* allow stack for small buffers */ + if (bounce_out) + sg_init_one(&in_sg, buf->data, out_size); + else if (out) sg_init_one(&in_sg, out, out_size); /* add to internal virtio queue */ ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list, extra ? 2 : 1, out ? 1 : 0, - posted ? cmd : HANDLE_NO_FREE(cmd), - GFP_ATOMIC); + cmd, GFP_ATOMIC); if (ret) { - if (posted) - kfree(cmd); - goto out; + um_pci_free_buf(dev, buf); + return ret; } if (posted) { virtqueue_kick(dev->cmd_vq); - ret = 0; - goto out; + return 0; } /* kick and poll for getting a response on the queue */ set_bit(UM_PCI_STAT_WAITING, &dev->status); virtqueue_kick(dev->cmd_vq); + ret = 0; while (1) { void *completed = virtqueue_get_buf(dev->cmd_vq, &len); - if (completed == HANDLE_NO_FREE(cmd)) + if (completed == buf) break; - if (completed && !HANDLE_IS_NO_FREE(completed)) - kfree(completed); + if (completed) + um_pci_free_buf(dev, completed); if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) || ++delay_count > um_pci_max_delay_us, @@ -172,8 +212,11 @@ static int um_pci_send_cmd(struct um_pci_device *dev, } clear_bit(UM_PCI_STAT_WAITING, &dev->status); -out: - put_cpu_var(um_pci_msg_bufs); + if (bounce_out) + memcpy(out, buf->data, out_size); + + um_pci_free_buf(dev, buf); + return ret; } @@ -187,20 +230,13 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, .size = size, .addr = offset, }; - /* buf->data is maximum size - we may only use parts of it */ - struct um_pci_message_buffer *buf; - u8 *data; - unsigned long ret = ULONG_MAX; - size_t bytes = sizeof(buf->data); + /* max 8, we might not use it all */ + u8 data[8]; if (!dev) return ULONG_MAX; - buf = get_cpu_var(um_pci_msg_bufs); - data = buf->data; - - if (buf) - memset(data, 0xff, bytes); + memset(data, 0xff, sizeof(data)); switch (size) { case 1: @@ -212,34 +248,26 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, break; default: WARN(1, "invalid config space read size %d\n", size); - goto out; + return ULONG_MAX; } - if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes)) - goto out; + if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, size)) + return ULONG_MAX; switch (size) { case 1: - ret = data[0]; - break; + return data[0]; case 2: - ret = le16_to_cpup((void *)data); - break; + return le16_to_cpup((void *)data); case 4: - ret = le32_to_cpup((void *)data); - break; + return le32_to_cpup((void *)data); #ifdef CONFIG_64BIT case 8: - ret = le64_to_cpup((void *)data); - break; + return le64_to_cpup((void *)data); #endif default: - break; + return ULONG_MAX; } - -out: - put_cpu_var(um_pci_msg_bufs); - return ret; } static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size, @@ -312,13 +340,8 @@ static void um_pci_bar_copy_from(void *priv, void *buffer, static unsigned long um_pci_bar_read(void *priv, unsigned int offset, int size) { - /* buf->data is maximum size - we may only use parts of it */ - struct um_pci_message_buffer *buf; - u8 *data; - unsigned long ret = ULONG_MAX; - - buf = get_cpu_var(um_pci_msg_bufs); - data = buf->data; + /* 8 is maximum size - we may only use parts of it */ + u8 data[8]; switch (size) { case 1: @@ -330,33 +353,25 @@ static unsigned long um_pci_bar_read(void *priv, unsigned int offset, break; default: WARN(1, "invalid config space read size %d\n", size); - goto out; + return ULONG_MAX; } um_pci_bar_copy_from(priv, data, offset, size); switch (size) { case 1: - ret = data[0]; - break; + return data[0]; case 2: - ret = le16_to_cpup((void *)data); - break; + return le16_to_cpup((void *)data); case 4: - ret = le32_to_cpup((void *)data); - break; + return le32_to_cpup((void *)data); #ifdef CONFIG_64BIT case 8: - ret = le64_to_cpup((void *)data); - break; + return le64_to_cpup((void *)data); #endif default: - break; + return ULONG_MAX; } - -out: - put_cpu_var(um_pci_msg_bufs); - return ret; } static void um_pci_bar_copy_to(void *priv, unsigned int offset, @@ -523,11 +538,8 @@ static void um_pci_cmd_vq_cb(struct virtqueue *vq) if (test_bit(UM_PCI_STAT_WAITING, &dev->status)) return; - while ((cmd = virtqueue_get_buf(vq, &len))) { - if (WARN_ON(HANDLE_IS_NO_FREE(cmd))) - continue; - kfree(cmd); - } + while ((cmd = virtqueue_get_buf(vq, &len))) + um_pci_free_buf(dev, cmd); } static void um_pci_irq_vq_cb(struct virtqueue *vq) @@ -1006,10 +1018,6 @@ static int __init um_pci_init(void) "No virtio device ID configured for PCI - no PCI support\n")) return 0; - um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer); - if (!um_pci_msg_bufs) - return -ENOMEM; - bridge = pci_alloc_host_bridge(0); if (!bridge) { err = -ENOMEM; @@ -1070,7 +1078,6 @@ free: pci_free_resource_list(&bridge->windows); pci_free_host_bridge(bridge); } - free_percpu(um_pci_msg_bufs); return err; } module_init(um_pci_init); @@ -1082,6 +1089,5 @@ static void __exit um_pci_exit(void) irq_domain_remove(um_pci_inner_domain); pci_free_resource_list(&bridge->windows); pci_free_host_bridge(bridge); - free_percpu(um_pci_msg_bufs); } module_exit(um_pci_exit); diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index 65df43fa9be5..ad8d78fb1d9a 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -52,7 +52,7 @@ struct virtio_uml_device { struct platform_device *pdev; struct virtio_uml_platform_data *pdata; - spinlock_t sock_lock; + raw_spinlock_t sock_lock; int sock, req_fd, irq; u64 features; u64 protocol_features; @@ -246,7 +246,7 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev, if (request_ack) msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; - spin_lock_irqsave(&vu_dev->sock_lock, flags); + raw_spin_lock_irqsave(&vu_dev->sock_lock, flags); rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); if (rc < 0) goto out; @@ -266,7 +266,7 @@ static int vhost_user_send(struct virtio_uml_device *vu_dev, } out: - spin_unlock_irqrestore(&vu_dev->sock_lock, flags); + raw_spin_unlock_irqrestore(&vu_dev->sock_lock, flags); return rc; } @@ -1239,7 +1239,7 @@ static int virtio_uml_probe(struct platform_device *pdev) goto error_free; vu_dev->sock = rc; - spin_lock_init(&vu_dev->sock_lock); + raw_spin_lock_init(&vu_dev->sock_lock); rc = vhost_user_init(vu_dev); if (rc) diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 338450741aac..a4991746f5ea 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -52,7 +52,7 @@ struct irq_entry { bool sigio_workaround; }; -static DEFINE_SPINLOCK(irq_lock); +static DEFINE_RAW_SPINLOCK(irq_lock); static LIST_HEAD(active_fds); static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); static bool irqs_suspended; @@ -257,7 +257,7 @@ static struct irq_entry *get_irq_entry_by_fd(int fd) return NULL; } -static void free_irq_entry(struct irq_entry *to_free, bool remove) +static void remove_irq_entry(struct irq_entry *to_free, bool remove) { if (!to_free) return; @@ -265,7 +265,6 @@ static void free_irq_entry(struct irq_entry *to_free, bool remove) if (remove) os_del_epoll_fd(to_free->fd); list_del(&to_free->list); - kfree(to_free); } static bool update_irq_entry(struct irq_entry *entry) @@ -286,17 +285,19 @@ static bool update_irq_entry(struct irq_entry *entry) return false; } -static void update_or_free_irq_entry(struct irq_entry *entry) +static struct irq_entry *update_or_remove_irq_entry(struct irq_entry *entry) { - if (!update_irq_entry(entry)) - free_irq_entry(entry, false); + if (update_irq_entry(entry)) + return NULL; + remove_irq_entry(entry, false); + return entry; } static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, void (*timetravel_handler)(int, int, void *, struct time_travel_event *)) { - struct irq_entry *irq_entry; + struct irq_entry *irq_entry, *to_free = NULL; int err, events = os_event_mask(type); unsigned long flags; @@ -304,9 +305,10 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, if (err < 0) goto out; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); irq_entry = get_irq_entry_by_fd(fd); if (irq_entry) { +already: /* cannot register the same FD twice with the same type */ if (WARN_ON(irq_entry->reg[type].events)) { err = -EALREADY; @@ -316,11 +318,22 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, /* temporarily disable to avoid IRQ-side locking */ os_del_epoll_fd(fd); } else { - irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC); - if (!irq_entry) { - err = -ENOMEM; - goto out_unlock; + struct irq_entry *new; + + /* don't restore interrupts */ + raw_spin_unlock(&irq_lock); + new = kzalloc(sizeof(*irq_entry), GFP_ATOMIC); + if (!new) { + local_irq_restore(flags); + return -ENOMEM; } + raw_spin_lock(&irq_lock); + irq_entry = get_irq_entry_by_fd(fd); + if (irq_entry) { + to_free = new; + goto already; + } + irq_entry = new; irq_entry->fd = fd; list_add_tail(&irq_entry->list, &active_fds); maybe_sigio_broken(fd); @@ -339,12 +352,11 @@ static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id, #endif WARN_ON(!update_irq_entry(irq_entry)); - spin_unlock_irqrestore(&irq_lock, flags); - - return 0; + err = 0; out_unlock: - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); out: + kfree(to_free); return err; } @@ -358,19 +370,20 @@ void free_irq_by_fd(int fd) struct irq_entry *to_free; unsigned long flags; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); to_free = get_irq_entry_by_fd(fd); - free_irq_entry(to_free, true); - spin_unlock_irqrestore(&irq_lock, flags); + remove_irq_entry(to_free, true); + raw_spin_unlock_irqrestore(&irq_lock, flags); + kfree(to_free); } EXPORT_SYMBOL(free_irq_by_fd); static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) { - struct irq_entry *entry; + struct irq_entry *entry, *to_free = NULL; unsigned long flags; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); list_for_each_entry(entry, &active_fds, list) { enum um_irq_type i; @@ -386,12 +399,13 @@ static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) os_del_epoll_fd(entry->fd); reg->events = 0; - update_or_free_irq_entry(entry); + to_free = update_or_remove_irq_entry(entry); goto out; } } out: - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); + kfree(to_free); } void deactivate_fd(int fd, int irqnum) @@ -402,7 +416,7 @@ void deactivate_fd(int fd, int irqnum) os_del_epoll_fd(fd); - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); entry = get_irq_entry_by_fd(fd); if (!entry) goto out; @@ -414,9 +428,10 @@ void deactivate_fd(int fd, int irqnum) entry->reg[i].events = 0; } - update_or_free_irq_entry(entry); + entry = update_or_remove_irq_entry(entry); out: - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); + kfree(entry); ignore_sigio_fd(fd); } @@ -546,7 +561,7 @@ void um_irqs_suspend(void) irqs_suspended = true; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); list_for_each_entry(entry, &active_fds, list) { enum um_irq_type t; bool clear = true; @@ -579,7 +594,7 @@ void um_irqs_suspend(void) !__ignore_sigio_fd(entry->fd); } } - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); } void um_irqs_resume(void) @@ -588,7 +603,7 @@ void um_irqs_resume(void) unsigned long flags; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); list_for_each_entry(entry, &active_fds, list) { if (entry->suspended) { int err = os_set_fd_async(entry->fd); @@ -602,7 +617,7 @@ void um_irqs_resume(void) } } } - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); irqs_suspended = false; send_sigio_to_self(); @@ -613,7 +628,7 @@ static int normal_irq_set_wake(struct irq_data *d, unsigned int on) struct irq_entry *entry; unsigned long flags; - spin_lock_irqsave(&irq_lock, flags); + raw_spin_lock_irqsave(&irq_lock, flags); list_for_each_entry(entry, &active_fds, list) { enum um_irq_type t; @@ -628,7 +643,7 @@ static int normal_irq_set_wake(struct irq_data *d, unsigned int on) } } unlock: - spin_unlock_irqrestore(&irq_lock, flags); + raw_spin_unlock_irqrestore(&irq_lock, flags); return 0; } #else diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index e5a2d4d897e0..0cd6fad3d908 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -191,7 +191,15 @@ void initial_thread_cb(void (*proc)(void *), void *arg) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - memcpy(dst, src, arch_task_struct_size); + /* init_task is not dynamically sized (missing FPU state) */ + if (unlikely(src == &init_task)) { + memcpy(dst, src, sizeof(init_task)); + memset((void *)dst + sizeof(init_task), 0, + arch_task_struct_size - sizeof(init_task)); + } else { + memcpy(dst, src, arch_task_struct_size); + } + return 0; } diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index f683cfc9e51a..e2f8f156402f 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -181,6 +181,10 @@ extern char __syscall_stub_start[]; static int stub_exe_fd; +#ifndef CLOSE_RANGE_CLOEXEC +#define CLOSE_RANGE_CLOEXEC (1U << 2) +#endif + static int userspace_tramp(void *stack) { char *const argv[] = { "uml-userspace", NULL }; @@ -202,8 +206,12 @@ static int userspace_tramp(void *stack) init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset); init_data.stub_data_offset = MMAP_OFFSET(offset); - /* Set CLOEXEC on all FDs and then unset on all memory related FDs */ - close_range(0, ~0U, CLOSE_RANGE_CLOEXEC); + /* + * Avoid leaking unneeded FDs to the stub by setting CLOEXEC on all FDs + * and then unsetting it on all memory related FDs. + * This is not strictly necessary from a safety perspective. + */ + syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC); fcntl(init_data.stub_data_fd, F_SETFD, 0); for (iomem = iomem_regions; iomem; iomem = iomem->next) @@ -224,7 +232,9 @@ static int userspace_tramp(void *stack) if (ret != sizeof(init_data)) exit(4); - execveat(stub_exe_fd, "", argv, NULL, AT_EMPTY_PATH); + /* Raw execveat for compatibility with older libc versions */ + syscall(__NR_execveat, stub_exe_fd, (unsigned long)"", + (unsigned long)argv, NULL, AT_EMPTY_PATH); exit(5); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 87198d957e2f..be2c311f5118 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2599,7 +2599,8 @@ config MITIGATION_IBPB_ENTRY depends on CPU_SUP_AMD && X86_64 default y help - Compile the kernel with support for the retbleed=ibpb mitigation. + Compile the kernel with support for the retbleed=ibpb and + spec_rstack_overflow={ibpb,ibpb-vmexit} mitigations. config MITIGATION_IBRS_ENTRY bool "Enable IBRS on kernel entry" diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index f2051644de94..606c74f27459 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -25,6 +25,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ # avoid errors with '-march=i386', and future flags may depend on the target to # be valid. KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS) +KBUILD_CFLAGS += -std=gnu11 KBUILD_CFLAGS += -fno-strict-aliasing -fPIE KBUILD_CFLAGS += -Wundef KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 94941c5a10ac..14db5b85114c 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -190,6 +190,7 @@ static __always_inline bool int80_is_external(void) /** * do_int80_emulation - 32-bit legacy syscall C entry from asm + * @regs: syscall arguments in struct pt_args on the stack. * * This entry point can be used by 32-bit and 64-bit programs to perform * 32-bit system calls. Instances of INT $0x80 can be found inline in diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 8f218ac0d445..2092d615333d 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -628,7 +628,7 @@ int x86_pmu_hw_config(struct perf_event *event) if (event->attr.type == event->pmu->type) event->hw.config |= x86_pmu_get_event_config(event); - if (event->attr.sample_period && x86_pmu.limit_period) { + if (!event->attr.freq && x86_pmu.limit_period) { s64 left = event->attr.sample_period; x86_pmu.limit_period(event, &left); if (left > event->attr.sample_period) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7601196d1d18..cdb19e3ba3aa 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -397,34 +397,28 @@ static struct event_constraint intel_lnc_event_constraints[] = { METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_FETCH_LAT, 6), METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_MEM_BOUND, 7), + INTEL_EVENT_CONSTRAINT(0x20, 0xf), + + INTEL_UEVENT_CONSTRAINT(0x012a, 0xf), + INTEL_UEVENT_CONSTRAINT(0x012b, 0xf), INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), INTEL_UEVENT_CONSTRAINT(0x0175, 0x4), INTEL_EVENT_CONSTRAINT(0x2e, 0x3ff), INTEL_EVENT_CONSTRAINT(0x3c, 0x3ff), - /* - * Generally event codes < 0x90 are restricted to counters 0-3. - * The 0x2E and 0x3C are exception, which has no restriction. - */ - INTEL_EVENT_CONSTRAINT_RANGE(0x01, 0x8f, 0xf), - INTEL_UEVENT_CONSTRAINT(0x01a3, 0xf), - INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), INTEL_UEVENT_CONSTRAINT(0x04a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x08a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x10a4, 0x1), INTEL_UEVENT_CONSTRAINT(0x01b1, 0x8), + INTEL_UEVENT_CONSTRAINT(0x01cd, 0x3fc), INTEL_UEVENT_CONSTRAINT(0x02cd, 0x3), - INTEL_EVENT_CONSTRAINT(0xce, 0x1), INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xdf, 0xf), - /* - * Generally event codes >= 0x90 are likely to have no restrictions. - * The exception are defined as above. - */ - INTEL_EVENT_CONSTRAINT_RANGE(0x90, 0xfe, 0x3ff), + + INTEL_UEVENT_CONSTRAINT(0x00e0, 0xf), EVENT_CONSTRAINT_END }; @@ -3958,6 +3952,85 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx) return test_bit(idx, (unsigned long *)&intel_cap->capabilities); } +static u64 intel_pmu_freq_start_period(struct perf_event *event) +{ + int type = event->attr.type; + u64 config, factor; + s64 start; + + /* + * The 127 is the lowest possible recommended SAV (sample after value) + * for a 4000 freq (default freq), according to the event list JSON file. + * Also, assume the workload is idle 50% time. + */ + factor = 64 * 4000; + if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE) + goto end; + + /* + * The estimation of the start period in the freq mode is + * based on the below assumption. + * + * For a cycles or an instructions event, 1GHZ of the + * underlying platform, 1 IPC. The workload is idle 50% time. + * The start period = 1,000,000,000 * 1 / freq / 2. + * = 500,000,000 / freq + * + * Usually, the branch-related events occur less than the + * instructions event. According to the Intel event list JSON + * file, the SAV (sample after value) of a branch-related event + * is usually 1/4 of an instruction event. + * The start period of branch-related events = 125,000,000 / freq. + * + * The cache-related events occurs even less. The SAV is usually + * 1/20 of an instruction event. + * The start period of cache-related events = 25,000,000 / freq. + */ + config = event->attr.config & PERF_HW_EVENT_MASK; + if (type == PERF_TYPE_HARDWARE) { + switch (config) { + case PERF_COUNT_HW_CPU_CYCLES: + case PERF_COUNT_HW_INSTRUCTIONS: + case PERF_COUNT_HW_BUS_CYCLES: + case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: + case PERF_COUNT_HW_STALLED_CYCLES_BACKEND: + case PERF_COUNT_HW_REF_CPU_CYCLES: + factor = 500000000; + break; + case PERF_COUNT_HW_BRANCH_INSTRUCTIONS: + case PERF_COUNT_HW_BRANCH_MISSES: + factor = 125000000; + break; + case PERF_COUNT_HW_CACHE_REFERENCES: + case PERF_COUNT_HW_CACHE_MISSES: + factor = 25000000; + break; + default: + goto end; + } + } + + if (type == PERF_TYPE_HW_CACHE) + factor = 25000000; +end: + /* + * Usually, a prime or a number with less factors (close to prime) + * is chosen as an SAV, which makes it less likely that the sampling + * period synchronizes with some periodic event in the workload. + * Minus 1 to make it at least avoiding values near power of twos + * for the default freq. + */ + start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1; + + if (start > x86_pmu.max_period) + start = x86_pmu.max_period; + + if (x86_pmu.limit_period) + x86_pmu.limit_period(event, &start); + + return start; +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -3969,6 +4042,12 @@ static int intel_pmu_hw_config(struct perf_event *event) if (ret) return ret; + if (event->attr.freq && event->attr.sample_freq) { + event->hw.sample_period = intel_pmu_freq_start_period(event); + event->hw.last_period = event->hw.sample_period; + local64_set(&event->hw.period_left, event->hw.sample_period); + } + if (event->attr.precise_ip) { if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) return -EINVAL; @@ -4905,20 +4984,22 @@ static inline bool intel_pmu_broken_perf_cap(void) static void update_pmu_cap(struct x86_hybrid_pmu *pmu) { - unsigned int sub_bitmaps, eax, ebx, ecx, edx; + unsigned int cntr, fixed_cntr, ecx, edx; + union cpuid35_eax eax; + union cpuid35_ebx ebx; - cpuid(ARCH_PERFMON_EXT_LEAF, &sub_bitmaps, &ebx, &ecx, &edx); + cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx); - if (ebx & ARCH_PERFMON_EXT_UMASK2) + if (ebx.split.umask2) pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2; - if (ebx & ARCH_PERFMON_EXT_EQ) + if (ebx.split.eq) pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ; - if (sub_bitmaps & ARCH_PERFMON_NUM_COUNTER_LEAF_BIT) { + if (eax.split.cntr_subleaf) { cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF, - &eax, &ebx, &ecx, &edx); - pmu->cntr_mask64 = eax; - pmu->fixed_cntr_mask64 = ebx; + &cntr, &fixed_cntr, &ecx, &edx); + pmu->cntr_mask64 = cntr; + pmu->fixed_cntr_mask64 = fixed_cntr; } if (!intel_pmu_broken_perf_cap()) { @@ -4941,11 +5022,6 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) else pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS); - if (pmu->intel_cap.pebs_output_pt_available) - pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; - else - pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT; - intel_pmu_check_event_constraints(pmu->event_constraints, pmu->cntr_mask64, pmu->fixed_cntr_mask64, @@ -5023,9 +5099,6 @@ static bool init_hybrid_pmu(int cpu) pr_info("%s PMU driver: ", pmu->name); - if (pmu->intel_cap.pebs_output_pt_available) - pr_cont("PEBS-via-PT "); - pr_cont("\n"); x86_pmu_show_pmu_cap(&pmu->pmu); @@ -5048,8 +5121,11 @@ static void intel_pmu_cpu_starting(int cpu) init_debug_store_on_cpu(cpu); /* - * Deal with CPUs that don't clear their LBRs on power-up. + * Deal with CPUs that don't clear their LBRs on power-up, and that may + * even boot with LBRs enabled. */ + if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && x86_pmu.lbr_nr) + msr_clear_bit(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR_BIT); intel_pmu_lbr_reset(); cpuc->lbr_sel = NULL; @@ -6370,11 +6446,9 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; if (pmu->pmu_type & hybrid_small_tiny) { pmu->intel_cap.perf_metrics = 0; - pmu->intel_cap.pebs_output_pt_available = 1; pmu->mid_ack = true; } else if (pmu->pmu_type & hybrid_big) { pmu->intel_cap.perf_metrics = 1; - pmu->intel_cap.pebs_output_pt_available = 0; pmu->late_ack = true; } } diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index ba74e1198328..f122882ef278 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1199,7 +1199,7 @@ struct event_constraint intel_lnc_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), - INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3ff), + INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3fc), INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3), INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */ @@ -2578,7 +2578,15 @@ void __init intel_ds_init(void) } pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual); - if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) { + /* + * The PEBS-via-PT is not supported on hybrid platforms, + * because not all CPUs of a hybrid machine support it. + * The global x86_pmu.intel_cap, which only contains the + * common capabilities, is used to check the availability + * of the feature. The per-PMU pebs_output_pt_available + * in a hybrid machine should be ignored. + */ + if (x86_pmu.intel_cap.pebs_output_pt_available) { pr_cont("PEBS-via-PT, "); x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; } diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index d3bb3865c1b1..6941f4811bec 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -370,6 +370,10 @@ static int rapl_pmu_event_init(struct perf_event *event) unsigned int rapl_pmu_idx; struct rapl_pmus *rapl_pmus; + /* only look at RAPL events */ + if (event->attr.type != event->pmu->type) + return -ENOENT; + /* unsupported modes and filters */ if (event->attr.sample_period) /* no sampling */ return -EINVAL; @@ -387,10 +391,6 @@ static int rapl_pmu_event_init(struct perf_event *event) rapl_pmus_scope = rapl_pmus->pmu.scope; if (rapl_pmus_scope == PERF_PMU_SCOPE_PKG || rapl_pmus_scope == PERF_PMU_SCOPE_DIE) { - /* only look at RAPL package events */ - if (event->attr.type != rapl_pmus_pkg->pmu.type) - return -ENOENT; - cfg = array_index_nospec((long)cfg, NR_RAPL_PKG_DOMAINS + 1); if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) return -EINVAL; @@ -398,10 +398,6 @@ static int rapl_pmu_event_init(struct perf_event *event) bit = cfg - 1; event->hw.event_base = rapl_model->rapl_pkg_msrs[bit].msr; } else if (rapl_pmus_scope == PERF_PMU_SCOPE_CORE) { - /* only look at RAPL core events */ - if (event->attr.type != rapl_pmus_core->pmu.type) - return -ENOENT; - cfg = array_index_nospec((long)cfg, NR_RAPL_CORE_DOMAINS + 1); if (!cfg || cfg >= NR_RAPL_PKG_DOMAINS + 1) return -EINVAL; @@ -883,6 +879,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_VFM(INTEL_METEORLAKE_L, &model_skl), X86_MATCH_VFM(INTEL_ARROWLAKE_H, &model_skl), X86_MATCH_VFM(INTEL_ARROWLAKE, &model_skl), + X86_MATCH_VFM(INTEL_ARROWLAKE_U, &model_skl), X86_MATCH_VFM(INTEL_LUNARLAKE_M, &model_skl), {}, }; diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index c35550581da0..823c0434bbad 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -48,6 +48,7 @@ KVM_X86_OP(set_idt) KVM_X86_OP(get_gdt) KVM_X86_OP(set_gdt) KVM_X86_OP(sync_dirty_debug_regs) +KVM_X86_OP(set_dr6) KVM_X86_OP(set_dr7) KVM_X86_OP(cache_reg) KVM_X86_OP(get_rflags) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b15cde0a9b5c..0b7af5902ff7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1696,6 +1696,7 @@ struct kvm_x86_ops { void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); + void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 9a71880eec07..72765b2fe0d8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -395,7 +395,8 @@ #define MSR_IA32_PASID_VALID BIT_ULL(31) /* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */ +#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT) #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 1ac79f361645..0ba8d20f2d1d 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -188,11 +188,33 @@ union cpuid10_edx { * detection/enumeration details: */ #define ARCH_PERFMON_EXT_LEAF 0x00000023 -#define ARCH_PERFMON_EXT_UMASK2 0x1 -#define ARCH_PERFMON_EXT_EQ 0x2 -#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1 #define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1 +union cpuid35_eax { + struct { + unsigned int leaf0:1; + /* Counters Sub-Leaf */ + unsigned int cntr_subleaf:1; + /* Auto Counter Reload Sub-Leaf */ + unsigned int acr_subleaf:1; + /* Events Sub-Leaf */ + unsigned int events_subleaf:1; + unsigned int reserved:28; + } split; + unsigned int full; +}; + +union cpuid35_ebx { + struct { + /* UnitMask2 Supported */ + unsigned int umask2:1; + /* EQ-bit Supported */ + unsigned int eq:1; + unsigned int reserved:30; + } split; + unsigned int full; +}; + /* * Intel Architectural LBR CPUID detection/enumeration details: */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 5d9685f92e5c..1581246491b5 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -531,6 +531,7 @@ static inline void __init snp_secure_tsc_init(void) { } #ifdef CONFIG_KVM_AMD_SEV bool snp_probe_rmptable_info(void); +int snp_rmptable_init(void); int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level); void snp_dump_hva_rmpentry(unsigned long address); int psmash(u64 pfn); @@ -541,6 +542,7 @@ void kdump_sev_callback(void); void snp_fixup_e820_tables(void); #else static inline bool snp_probe_rmptable_info(void) { return false; } +static inline int snp_rmptable_init(void) { return -ENOSYS; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } static inline void snp_dump_hva_rmpentry(unsigned long address) {} static inline int psmash(u64 pfn) { return -ENODEV; } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5a505aa65489..a5d0998d7604 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1115,6 +1115,8 @@ do_cmd_auto: case RETBLEED_MITIGATION_IBPB: setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + mitigate_smt = true; /* * IBPB on entry already obviates the need for @@ -1124,9 +1126,6 @@ do_cmd_auto: setup_clear_cpu_cap(X86_FEATURE_UNRET); setup_clear_cpu_cap(X86_FEATURE_RETHUNK); - setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); - mitigate_smt = true; - /* * There is no need for RSB filling: entry_ibpb() ensures * all predictions, including the RSB, are invalidated, @@ -2646,6 +2645,7 @@ static void __init srso_select_mitigation(void) if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) { if (has_microcode) { setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); srso_mitigation = SRSO_MITIGATION_IBPB; /* @@ -2655,6 +2655,13 @@ static void __init srso_select_mitigation(void) */ setup_clear_cpu_cap(X86_FEATURE_UNRET); setup_clear_cpu_cap(X86_FEATURE_RETHUNK); + + /* + * There is no need for RSB filling: entry_ibpb() ensures + * all predictions, including the RSB, are invalidated, + * regardless of IBPB implementation. + */ + setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); } } else { pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); @@ -2663,8 +2670,8 @@ static void __init srso_select_mitigation(void) ibpb_on_vmexit: case SRSO_CMD_IBPB_ON_VMEXIT: - if (IS_ENABLED(CONFIG_MITIGATION_SRSO)) { - if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { + if (IS_ENABLED(CONFIG_MITIGATION_IBPB_ENTRY)) { + if (has_microcode) { setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; @@ -2676,8 +2683,8 @@ ibpb_on_vmexit: setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT); } } else { - pr_err("WARNING: kernel not compiled with MITIGATION_SRSO.\n"); - } + pr_err("WARNING: kernel not compiled with MITIGATION_IBPB_ENTRY.\n"); + } break; default: break; diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index 8bd84114c2d9..df838e3bdbe0 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -45,6 +45,7 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AES, X86_FEATURE_XMM2 }, { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, { X86_FEATURE_GFNI, X86_FEATURE_XMM2 }, + { X86_FEATURE_AVX_VNNI, X86_FEATURE_AVX }, { X86_FEATURE_FMA, X86_FEATURE_AVX }, { X86_FEATURE_VAES, X86_FEATURE_AVX }, { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX }, diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 9651275aecd1..dfec2c61e354 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -153,8 +153,8 @@ static void geode_configure(void) u8 ccr3; local_irq_save(flags); - /* Suspend on halt power saving and enable #SUSP pin */ - setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); + /* Suspend on halt power saving */ + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x08); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 59d23cdf4ed0..dd8748c45529 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -2,6 +2,7 @@ /* * Architecture specific OF callbacks. */ +#include <linux/acpi.h> #include <linux/export.h> #include <linux/io.h> #include <linux/interrupt.h> @@ -313,6 +314,6 @@ void __init x86_flattree_get_config(void) if (initial_dtb) early_memunmap(dt, map_len); #endif - if (of_have_populated_dt()) + if (acpi_disabled && of_have_populated_dt()) x86_init.mpparse.parse_smp_cfg = x86_dtb_parse_smp_config; } diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 385e3a5fc304..feca4f20b06a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -25,8 +25,10 @@ #include <asm/posted_intr.h> #include <asm/irq_remapping.h> +#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_THERMAL_VECTOR) #define CREATE_TRACE_POINTS #include <asm/trace/irq_vectors.h> +#endif DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 2cbb3874ad39..8eb3a88707f2 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -1180,7 +1180,7 @@ void kvm_set_cpu_caps(void) SYNTHESIZED_F(SBPB), SYNTHESIZED_F(IBPB_BRTYPE), SYNTHESIZED_F(SRSO_NO), - SYNTHESIZED_F(SRSO_USER_KERNEL_NO), + F(SRSO_USER_KERNEL_NO), ); kvm_cpu_cap_init(CPUID_8000_0022_EAX, diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 6a6dd5a84f22..6ebeb6cea6c0 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2226,6 +2226,9 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) u32 vector; bool all_cpus; + if (!lapic_in_kernel(vcpu)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (hc->code == HVCALL_SEND_IPI) { if (!hc->fast) { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi, @@ -2852,7 +2855,8 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED; ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED; - ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; + if (!vcpu || lapic_in_kernel(vcpu)) + ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; if (evmcs_ver) ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index a45ae60e84ab..8160870398b9 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5540,7 +5540,7 @@ void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, union kvm_mmu_page_role root_role; /* NPT requires CR0.PG=1. */ - WARN_ON_ONCE(cpu_role.base.direct); + WARN_ON_ONCE(cpu_role.base.direct || !cpu_role.base.guest_mode); root_role = cpu_role.base; root_role.level = kvm_mmu_get_tdp_level(vcpu); @@ -7120,6 +7120,19 @@ static void mmu_destroy_caches(void) kmem_cache_destroy(mmu_page_header_cache); } +static void kvm_wake_nx_recovery_thread(struct kvm *kvm) +{ + /* + * The NX recovery thread is spawned on-demand at the first KVM_RUN and + * may not be valid even though the VM is globally visible. Do nothing, + * as such a VM can't have any possible NX huge pages. + */ + struct vhost_task *nx_thread = READ_ONCE(kvm->arch.nx_huge_page_recovery_thread); + + if (nx_thread) + vhost_task_wake(nx_thread); +} + static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp) { if (nx_hugepage_mitigation_hard_disabled) @@ -7180,7 +7193,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) kvm_mmu_zap_all_fast(kvm); mutex_unlock(&kvm->slots_lock); - vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread); + kvm_wake_nx_recovery_thread(kvm); } mutex_unlock(&kvm_lock); } @@ -7315,7 +7328,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) - vhost_task_wake(kvm->arch.nx_huge_page_recovery_thread); + kvm_wake_nx_recovery_thread(kvm); mutex_unlock(&kvm_lock); } @@ -7447,18 +7460,25 @@ static bool kvm_nx_huge_page_recovery_worker(void *data) return true; } -static void kvm_mmu_start_lpage_recovery(struct once *once) +static int kvm_mmu_start_lpage_recovery(struct once *once) { struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once); struct kvm *kvm = container_of(ka, struct kvm, arch); + struct vhost_task *nx_thread; kvm->arch.nx_huge_page_last = get_jiffies_64(); - kvm->arch.nx_huge_page_recovery_thread = vhost_task_create( - kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill, - kvm, "kvm-nx-lpage-recovery"); + nx_thread = vhost_task_create(kvm_nx_huge_page_recovery_worker, + kvm_nx_huge_page_recovery_worker_kill, + kvm, "kvm-nx-lpage-recovery"); - if (kvm->arch.nx_huge_page_recovery_thread) - vhost_task_start(kvm->arch.nx_huge_page_recovery_thread); + if (IS_ERR(nx_thread)) + return PTR_ERR(nx_thread); + + vhost_task_start(nx_thread); + + /* Make the task visible only once it is fully started. */ + WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread); + return 0; } int kvm_mmu_post_init_vm(struct kvm *kvm) @@ -7466,10 +7486,7 @@ int kvm_mmu_post_init_vm(struct kvm *kvm) if (nx_hugepage_mitigation_hard_disabled) return 0; - call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); - if (!kvm->arch.nx_huge_page_recovery_thread) - return -ENOMEM; - return 0; + return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); } void kvm_mmu_pre_destroy_vm(struct kvm *kvm) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index d77b094d9a4d..04c375bf1ac2 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -646,6 +646,11 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, u32 pause_count12; u32 pause_thresh12; + nested_svm_transition_tlb_flush(vcpu); + + /* Enter Guest-Mode */ + enter_guest_mode(vcpu); + /* * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes. @@ -762,11 +767,6 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, } } - nested_svm_transition_tlb_flush(vcpu); - - /* Enter Guest-Mode */ - enter_guest_mode(vcpu); - /* * Merge guest and host intercepts - must be called with vcpu in * guest-mode to take effect. diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a2a794c32050..0dbb25442ec1 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2972,6 +2972,16 @@ void __init sev_hardware_setup(void) WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_FLUSHBYASID))) goto out; + /* + * The kernel's initcall infrastructure lacks the ability to express + * dependencies between initcalls, whereas the modules infrastructure + * automatically handles dependencies via symbol loading. Ensure the + * PSP SEV driver is initialized before proceeding if KVM is built-in, + * as the dependency isn't handled by the initcall infrastructure. + */ + if (IS_BUILTIN(CONFIG_KVM_AMD) && sev_module_init()) + goto out; + /* Retrieve SEV CPUID information */ cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 7640a84e554a..a713c803a3a3 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1991,11 +1991,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) svm->asid = sd->next_asid++; } -static void svm_set_dr6(struct vcpu_svm *svm, unsigned long value) +static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) { - struct vmcb *vmcb = svm->vmcb; + struct vmcb *vmcb = to_svm(vcpu)->vmcb; - if (svm->vcpu.arch.guest_state_protected) + if (vcpu->arch.guest_state_protected) return; if (unlikely(value != vmcb->save.dr6)) { @@ -4247,10 +4247,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu, * Run with all-zero DR6 unless needed, so that we can get the exact cause * of a #DB. */ - if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - svm_set_dr6(svm, vcpu->arch.dr6); - else - svm_set_dr6(svm, DR6_ACTIVE_LOW); + if (likely(!(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT))) + svm_set_dr6(vcpu, DR6_ACTIVE_LOW); clgi(); kvm_load_guest_xsave_state(vcpu); @@ -5043,6 +5041,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, + .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, .cache_reg = svm_cache_reg, diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index 2427f918e763..43ee9ed11291 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -61,6 +61,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, + .set_dr6 = vmx_set_dr6, .set_dr7 = vmx_set_dr7, .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, .cache_reg = vmx_cache_reg, diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 8a7af02d466e..ed8a3cb53961 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -5084,6 +5084,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, load_vmcs12_host_state(vcpu, vmcs12); + /* + * Process events if an injectable IRQ or NMI is pending, even + * if the event is blocked (RFLAGS.IF is cleared on VM-Exit). + * If an event became pending while L2 was active, KVM needs to + * either inject the event or request an IRQ/NMI window. SMIs + * don't need to be processed as SMM is mutually exclusive with + * non-root mode. INIT/SIPI don't need to be checked as INIT + * is blocked post-VMXON, and SIPIs are ignored. + */ + if (kvm_cpu_has_injectable_intr(vcpu) || vcpu->arch.nmi_pending) + kvm_make_request(KVM_REQ_EVENT, vcpu); return; } diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f72835e85b6d..6c56d5235f0f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -5648,6 +5648,12 @@ void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) set_debugreg(DR6_RESERVED, 6); } +void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) +{ + lockdep_assert_irqs_disabled(); + set_debugreg(vcpu->arch.dr6, 6); +} + void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) { vmcs_writel(GUEST_DR7, val); @@ -7417,10 +7423,6 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit) vmx->loaded_vmcs->host_state.cr4 = cr4; } - /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ - if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) - set_debugreg(vcpu->arch.dr6, 6); - /* When single-stepping over STI and MOV SS, we must clear the * corresponding interruptibility bits in the guest state. Otherwise * vmentry fails as it then expects bit 14 (BS) in pending debug diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index ce3295a67c04..430773a5ef8e 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -73,6 +73,7 @@ void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt); +void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val); void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val); void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu); void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6d4a6734b2d6..6fc4ddc606bd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10961,6 +10961,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[1], 1); set_debugreg(vcpu->arch.eff_db[2], 2); set_debugreg(vcpu->arch.eff_db[3], 3); + /* When KVM_DEBUGREG_WONT_EXIT, dr6 is accessible in guest. */ + if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) + kvm_x86_call(set_dr6)(vcpu, vcpu->arch.dr6); } else if (unlikely(hw_breakpoint_active())) { set_debugreg(0, 7); } @@ -12741,6 +12744,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) "does not run without ignore_msrs=1, please report it to kvm@vger.kernel.org.\n"); } + once_init(&kvm->arch.nx_once); return 0; out_uninit_mmu: @@ -12750,12 +12754,6 @@ out: return ret; } -int kvm_arch_post_init_vm(struct kvm *kvm) -{ - once_init(&kvm->arch.nx_once); - return 0; -} - static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { vcpu_load(vcpu); @@ -12879,11 +12877,11 @@ void kvm_arch_destroy_vm(struct kvm *kvm) mutex_unlock(&kvm->slots_lock); } kvm_unload_vcpu_mmus(kvm); + kvm_destroy_vcpus(kvm); kvm_x86_call(vm_destroy)(kvm); kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); - kvm_destroy_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1)); kvm_mmu_uninit_vm(kvm); diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c index 76eaeb93928c..eb1cdadc8a61 100644 --- a/arch/x86/um/os-Linux/registers.c +++ b/arch/x86/um/os-Linux/registers.c @@ -18,6 +18,7 @@ #include <registers.h> #include <sys/mman.h> +static unsigned long ptrace_regset; unsigned long host_fp_size; int get_fp_registers(int pid, unsigned long *regs) @@ -27,7 +28,7 @@ int get_fp_registers(int pid, unsigned long *regs) .iov_len = host_fp_size, }; - if (ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov) < 0) + if (ptrace(PTRACE_GETREGSET, pid, ptrace_regset, &iov) < 0) return -errno; return 0; } @@ -39,7 +40,7 @@ int put_fp_registers(int pid, unsigned long *regs) .iov_len = host_fp_size, }; - if (ptrace(PTRACE_SETREGSET, pid, NT_X86_XSTATE, &iov) < 0) + if (ptrace(PTRACE_SETREGSET, pid, ptrace_regset, &iov) < 0) return -errno; return 0; } @@ -58,9 +59,23 @@ int arch_init_registers(int pid) return -ENOMEM; /* GDB has x86_xsave_length, which uses x86_cpuid_count */ - ret = ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov); + ptrace_regset = NT_X86_XSTATE; + ret = ptrace(PTRACE_GETREGSET, pid, ptrace_regset, &iov); if (ret) ret = -errno; + + if (ret == -ENODEV) { +#ifdef CONFIG_X86_32 + ptrace_regset = NT_PRXFPREG; +#else + ptrace_regset = NT_PRFPREG; +#endif + iov.iov_len = 2 * 1024 * 1024; + ret = ptrace(PTRACE_GETREGSET, pid, ptrace_regset, &iov); + if (ret) + ret = -errno; + } + munmap(iov.iov_base, 2 * 1024 * 1024); host_fp_size = iov.iov_len; diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 75087e85b6fd..2934e170b0fe 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -187,7 +187,12 @@ static int copy_sc_to_user(struct sigcontext __user *to, * Put magic/size values for userspace. We do not bother to verify them * later on, however, userspace needs them should it try to read the * XSTATE data. And ptrace does not fill in these parts. + * + * Skip this if we do not have an XSTATE frame. */ + if (host_fp_size <= sizeof(to_fp64->fpstate)) + return 0; + BUILD_BUG_ON(sizeof(int) != FP_XSTATE_MAGIC2_SIZE); #ifdef CONFIG_X86_32 __put_user(offsetof(struct _fpstate_32, _fxsr_env) + @@ -367,11 +372,13 @@ int setup_signal_stack_si(unsigned long stack_top, struct ksignal *ksig, int err = 0, sig = ksig->sig; unsigned long fp_to; - frame = (struct rt_sigframe __user *) - round_down(stack_top - sizeof(struct rt_sigframe), 16); + frame = (void __user *)stack_top - sizeof(struct rt_sigframe); /* Add required space for math frame */ - frame = (struct rt_sigframe __user *)((unsigned long)frame - math_size); + frame = (void __user *)((unsigned long)frame - math_size); + + /* ABI requires 16 byte boundary alignment */ + frame = (void __user *)round_down((unsigned long)frame, 16); /* Subtract 128 for a red zone and 8 for proper alignment */ frame = (struct rt_sigframe __user *) ((unsigned long) frame - 128 - 8); diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 1dcc027ec77e..42e74a5a7d78 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -505,19 +505,19 @@ static bool __init setup_rmptable(void) * described in the SNP_INIT_EX firmware command description in the SNP * firmware ABI spec. */ -static int __init snp_rmptable_init(void) +int __init snp_rmptable_init(void) { unsigned int i; u64 val; - if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) - return 0; + if (WARN_ON_ONCE(!cc_platform_has(CC_ATTR_HOST_SEV_SNP))) + return -ENOSYS; - if (!amd_iommu_snp_en) - goto nosnp; + if (WARN_ON_ONCE(!amd_iommu_snp_en)) + return -ENOSYS; if (!setup_rmptable()) - goto nosnp; + return -ENOSYS; /* * Check if SEV-SNP is already enabled, this can happen in case of @@ -530,7 +530,7 @@ static int __init snp_rmptable_init(void) /* Zero out the RMP bookkeeping area */ if (!clear_rmptable_bookkeeping()) { free_rmp_segment_table(); - goto nosnp; + return -ENOSYS; } /* Zero out the RMP entries */ @@ -562,17 +562,8 @@ skip_enable: crash_kexec_post_notifiers = true; return 0; - -nosnp: - cc_platform_clear(CC_ATTR_HOST_SEV_SNP); - return -ENOSYS; } -/* - * This must be called after the IOMMU has been initialized. - */ -device_initcall(snp_rmptable_init); - static void set_rmp_segment_info(unsigned int segment_shift) { rmp_segment_shift = segment_shift; diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 2c70cd35e72c..d078de2c952b 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -111,6 +111,51 @@ static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; */ static DEFINE_SPINLOCK(xen_reservation_lock); +/* Protected by xen_reservation_lock. */ +#define MIN_CONTIG_ORDER 9 /* 2MB */ +static unsigned int discontig_frames_order = MIN_CONTIG_ORDER; +static unsigned long discontig_frames_early[1UL << MIN_CONTIG_ORDER] __initdata; +static unsigned long *discontig_frames __refdata = discontig_frames_early; +static bool discontig_frames_dyn; + +static int alloc_discontig_frames(unsigned int order) +{ + unsigned long *new_array, *old_array; + unsigned int old_order; + unsigned long flags; + + BUG_ON(order < MIN_CONTIG_ORDER); + BUILD_BUG_ON(sizeof(discontig_frames_early) != PAGE_SIZE); + + new_array = (unsigned long *)__get_free_pages(GFP_KERNEL, + order - MIN_CONTIG_ORDER); + if (!new_array) + return -ENOMEM; + + spin_lock_irqsave(&xen_reservation_lock, flags); + + old_order = discontig_frames_order; + + if (order > discontig_frames_order || !discontig_frames_dyn) { + if (!discontig_frames_dyn) + old_array = NULL; + else + old_array = discontig_frames; + + discontig_frames = new_array; + discontig_frames_order = order; + discontig_frames_dyn = true; + } else { + old_array = new_array; + } + + spin_unlock_irqrestore(&xen_reservation_lock, flags); + + free_pages((unsigned long)old_array, old_order - MIN_CONTIG_ORDER); + + return 0; +} + /* * Note about cr3 (pagetable base) values: * @@ -814,6 +859,9 @@ static void __init xen_after_bootmem(void) SetPagePinned(virt_to_page(level3_user_vsyscall)); #endif xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP); + + if (alloc_discontig_frames(MIN_CONTIG_ORDER)) + BUG(); } static void xen_unpin_page(struct mm_struct *mm, struct page *page, @@ -2203,10 +2251,6 @@ void __init xen_init_mmu_ops(void) memset(dummy_mapping, 0xff, PAGE_SIZE); } -/* Protected by xen_reservation_lock. */ -#define MAX_CONTIG_ORDER 9 /* 2MB */ -static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER]; - #define VOID_PTE (mfn_pte(0, __pgprot(0))) static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, unsigned long *in_frames, @@ -2323,18 +2367,25 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, unsigned int address_bits, dma_addr_t *dma_handle) { - unsigned long *in_frames = discontig_frames, out_frame; + unsigned long *in_frames, out_frame; unsigned long flags; int success; unsigned long vstart = (unsigned long)phys_to_virt(pstart); - if (unlikely(order > MAX_CONTIG_ORDER)) - return -ENOMEM; + if (unlikely(order > discontig_frames_order)) { + if (!discontig_frames_dyn) + return -ENOMEM; + + if (alloc_discontig_frames(order)) + return -ENOMEM; + } memset((void *) vstart, 0, PAGE_SIZE << order); spin_lock_irqsave(&xen_reservation_lock, flags); + in_frames = discontig_frames; + /* 1. Zap current PTEs, remembering MFNs. */ xen_zap_pfn_range(vstart, order, in_frames, NULL); @@ -2358,12 +2409,12 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) { - unsigned long *out_frames = discontig_frames, in_frame; + unsigned long *out_frames, in_frame; unsigned long flags; int success; unsigned long vstart; - if (unlikely(order > MAX_CONTIG_ORDER)) + if (unlikely(order > discontig_frames_order)) return; vstart = (unsigned long)phys_to_virt(pstart); @@ -2371,6 +2422,8 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) spin_lock_irqsave(&xen_reservation_lock, flags); + out_frames = discontig_frames; + /* 1. Find start MFN of contiguous extent. */ in_frame = virt_to_mfn((void *)vstart); diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 9252652afe59..894edf8d6d62 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -100,9 +100,6 @@ SYM_FUNC_START(xen_hypercall_hvm) push %r10 push %r9 push %r8 -#ifdef CONFIG_FRAME_POINTER - pushq $0 /* Dummy push for stack alignment. */ -#endif #endif /* Set the vendor specific function. */ call __xen_hypercall_setfunc @@ -117,11 +114,8 @@ SYM_FUNC_START(xen_hypercall_hvm) pop %ebx pop %eax #else - lea xen_hypercall_amd(%rip), %rbx - cmp %rax, %rbx -#ifdef CONFIG_FRAME_POINTER - pop %rax /* Dummy pop. */ -#endif + lea xen_hypercall_amd(%rip), %rcx + cmp %rax, %rcx pop %r8 pop %r9 pop %r10 @@ -132,6 +126,7 @@ SYM_FUNC_START(xen_hypercall_hvm) pop %rcx pop %rax #endif + FRAME_END /* Use correct hypercall function. */ jz xen_hypercall_amd jmp xen_hypercall_intel diff --git a/block/bio.c b/block/bio.c index f0c416e5931d..6ac5983ba51e 100644 --- a/block/bio.c +++ b/block/bio.c @@ -77,7 +77,7 @@ struct bio_slab { struct kmem_cache *slab; unsigned int slab_ref; unsigned int slab_size; - char name[8]; + char name[12]; }; static DEFINE_MUTEX(bio_slab_lock); static DEFINE_XARRAY(bio_slabs); diff --git a/block/blk-merge.c b/block/blk-merge.c index 15cd231d560c..1d1589c35297 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -270,7 +270,7 @@ static bool bvec_split_segs(const struct queue_limits *lim, const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes, unsigned max_segs, unsigned max_bytes) { - unsigned max_len = min(max_bytes, UINT_MAX) - *bytes; + unsigned max_len = max_bytes - *bytes; unsigned len = min(bv->bv_len, max_len); unsigned total_len = 0; unsigned seg_size = 0; @@ -329,7 +329,7 @@ int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, if (nsegs < lim->max_segments && bytes + bv.bv_len <= max_bytes && - bv.bv_offset + bv.bv_len <= PAGE_SIZE) { + bv.bv_offset + bv.bv_len <= lim->min_segment_size) { nsegs++; bytes += bv.bv_len; } else { @@ -556,11 +556,14 @@ int __blk_rq_map_sg(struct request_queue *q, struct request *rq, { struct req_iterator iter = { .bio = rq->bio, - .iter = rq->bio->bi_iter, }; struct phys_vec vec; int nsegs = 0; + /* the internal flush request may not have bio attached */ + if (iter.bio) + iter.iter = iter.bio->bi_iter; + while (blk_map_iter_next(rq, &iter, &vec)) { *last_sg = blk_next_sg(last_sg, sglist); sg_set_page(*last_sg, phys_to_page(vec.paddr), vec.len, diff --git a/block/blk-settings.c b/block/blk-settings.c index c44dadc35e1e..b9c6f0ec1c49 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -246,6 +246,7 @@ int blk_validate_limits(struct queue_limits *lim) { unsigned int max_hw_sectors; unsigned int logical_block_sectors; + unsigned long seg_size; int err; /* @@ -303,7 +304,7 @@ int blk_validate_limits(struct queue_limits *lim) max_hw_sectors = min_not_zero(lim->max_hw_sectors, lim->max_dev_sectors); if (lim->max_user_sectors) { - if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE) + if (lim->max_user_sectors < BLK_MIN_SEGMENT_SIZE / SECTOR_SIZE) return -EINVAL; lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors); } else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) { @@ -341,7 +342,7 @@ int blk_validate_limits(struct queue_limits *lim) */ if (!lim->seg_boundary_mask) lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; - if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1)) + if (WARN_ON_ONCE(lim->seg_boundary_mask < BLK_MIN_SEGMENT_SIZE - 1)) return -EINVAL; /* @@ -362,10 +363,17 @@ int blk_validate_limits(struct queue_limits *lim) */ if (!lim->max_segment_size) lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; - if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) + if (WARN_ON_ONCE(lim->max_segment_size < BLK_MIN_SEGMENT_SIZE)) return -EINVAL; } + /* setup min segment size for building new segment in fast path */ + if (lim->seg_boundary_mask > lim->max_segment_size - 1) + seg_size = lim->max_segment_size; + else + seg_size = lim->seg_boundary_mask + 1; + lim->min_segment_size = min_t(unsigned int, seg_size, PAGE_SIZE); + /* * We require drivers to at least do logical block aligned I/O, but * historically could not check for that due to the separate calls diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 761ea662ddc3..0c77244a35c9 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -410,13 +410,14 @@ static bool disk_insert_zone_wplug(struct gendisk *disk, } } hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]); + atomic_inc(&disk->nr_zone_wplugs); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); return true; } -static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, - sector_t sector) +static struct blk_zone_wplug *disk_get_hashed_zone_wplug(struct gendisk *disk, + sector_t sector) { unsigned int zno = disk_zone_no(disk, sector); unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits); @@ -437,6 +438,15 @@ static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, return NULL; } +static inline struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk, + sector_t sector) +{ + if (!atomic_read(&disk->nr_zone_wplugs)) + return NULL; + + return disk_get_hashed_zone_wplug(disk, sector); +} + static void disk_free_zone_wplug_rcu(struct rcu_head *rcu_head) { struct blk_zone_wplug *zwplug = @@ -503,6 +513,7 @@ static void disk_remove_zone_wplug(struct gendisk *disk, zwplug->flags |= BLK_ZONE_WPLUG_UNHASHED; spin_lock_irqsave(&disk->zone_wplugs_lock, flags); hlist_del_init_rcu(&zwplug->node); + atomic_dec(&disk->nr_zone_wplugs); spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags); disk_put_zone_wplug(zwplug); } @@ -593,6 +604,11 @@ static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug) { struct bio *bio; + if (bio_list_empty(&zwplug->bio_list)) + return; + + pr_warn_ratelimited("%s: zone %u: Aborting plugged BIOs\n", + zwplug->disk->disk_name, zwplug->zone_no); while ((bio = bio_list_pop(&zwplug->bio_list))) blk_zone_wplug_bio_io_error(zwplug, bio); } @@ -1040,6 +1056,47 @@ plug: return true; } +static void blk_zone_wplug_handle_native_zone_append(struct bio *bio) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + struct blk_zone_wplug *zwplug; + unsigned long flags; + + /* + * We have native support for zone append operations, so we are not + * going to handle @bio through plugging. However, we may already have a + * zone write plug for the target zone if that zone was previously + * partially written using regular writes. In such case, we risk leaving + * the plug in the disk hash table if the zone is fully written using + * zone append operations. Avoid this by removing the zone write plug. + */ + zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector); + if (likely(!zwplug)) + return; + + spin_lock_irqsave(&zwplug->lock, flags); + + /* + * We are about to remove the zone write plug. But if the user + * (mistakenly) has issued regular writes together with native zone + * append, we must aborts the writes as otherwise the plugged BIOs would + * not be executed by the plug BIO work as disk_get_zone_wplug() will + * return NULL after the plug is removed. Aborting the plugged write + * BIOs is consistent with the fact that these writes will most likely + * fail anyway as there is no ordering guarantees between zone append + * operations and regular write operations. + */ + if (!bio_list_empty(&zwplug->bio_list)) { + pr_warn_ratelimited("%s: zone %u: Invalid mix of zone append and regular writes\n", + disk->disk_name, zwplug->zone_no); + disk_zone_wplug_abort(zwplug); + } + disk_remove_zone_wplug(disk, zwplug); + spin_unlock_irqrestore(&zwplug->lock, flags); + + disk_put_zone_wplug(zwplug); +} + /** * blk_zone_plug_bio - Handle a zone write BIO with zone write plugging * @bio: The BIO being submitted @@ -1096,8 +1153,10 @@ bool blk_zone_plug_bio(struct bio *bio, unsigned int nr_segs) */ switch (bio_op(bio)) { case REQ_OP_ZONE_APPEND: - if (!bdev_emulates_zone_append(bdev)) + if (!bdev_emulates_zone_append(bdev)) { + blk_zone_wplug_handle_native_zone_append(bio); return false; + } fallthrough; case REQ_OP_WRITE: case REQ_OP_WRITE_ZEROES: @@ -1284,6 +1343,7 @@ static int disk_alloc_zone_resources(struct gendisk *disk, { unsigned int i; + atomic_set(&disk->nr_zone_wplugs, 0); disk->zone_wplugs_hash_bits = min(ilog2(pool_size) + 1, BLK_ZONE_WPLUG_MAX_HASH_BITS); @@ -1338,6 +1398,7 @@ static void disk_destroy_zone_wplugs_hash_table(struct gendisk *disk) } } + WARN_ON_ONCE(atomic_read(&disk->nr_zone_wplugs)); kfree(disk->zone_wplugs_hash); disk->zone_wplugs_hash = NULL; disk->zone_wplugs_hash_bits = 0; @@ -1550,11 +1611,12 @@ static int blk_revalidate_seq_zone(struct blk_zone *zone, unsigned int idx, } /* - * We need to track the write pointer of all zones that are not - * empty nor full. So make sure we have a zone write plug for - * such zone if the device has a zone write plug hash table. + * If the device needs zone append emulation, we need to track the + * write pointer of all zones that are not empty nor full. So make sure + * we have a zone write plug for such zone if the device has a zone + * write plug hash table. */ - if (!disk->zone_wplugs_hash) + if (!queue_emulates_zone_append(disk->queue) || !disk->zone_wplugs_hash) return 0; disk_zone_wplug_sync_wp_offset(disk, zone); diff --git a/block/blk.h b/block/blk.h index 90fa5f28ccab..9cf9a0099416 100644 --- a/block/blk.h +++ b/block/blk.h @@ -14,6 +14,7 @@ struct elevator_type; #define BLK_DEV_MAX_SECTORS (LLONG_MAX >> 9) +#define BLK_MIN_SEGMENT_SIZE 4096 /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) @@ -358,8 +359,12 @@ struct bio *bio_split_zone_append(struct bio *bio, static inline bool bio_may_need_split(struct bio *bio, const struct queue_limits *lim) { - return lim->chunk_sectors || bio->bi_vcnt != 1 || - bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE; + if (lim->chunk_sectors) + return true; + if (bio->bi_vcnt != 1) + return true; + return bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > + lim->min_segment_size; } /** diff --git a/block/partitions/mac.c b/block/partitions/mac.c index c80183156d68..b02530d98629 100644 --- a/block/partitions/mac.c +++ b/block/partitions/mac.c @@ -53,13 +53,25 @@ int mac_partition(struct parsed_partitions *state) } secsize = be16_to_cpu(md->block_size); put_dev_sector(sect); + + /* + * If the "block size" is not a power of 2, things get weird - we might + * end up with a partition straddling a sector boundary, so we wouldn't + * be able to read a partition entry with read_part_sector(). + * Real block sizes are probably (?) powers of two, so just require + * that. + */ + if (!is_power_of_2(secsize)) + return -1; datasize = round_down(secsize, 512); data = read_part_sector(state, datasize / 512, §); if (!data) return -1; partoffset = secsize % 512; - if (partoffset + sizeof(*part) > datasize) + if (partoffset + sizeof(*part) > datasize) { + put_dev_sector(sect); return -1; + } part = (struct mac_partition *) (data + partoffset); if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) { put_dev_sector(sect); @@ -112,8 +124,8 @@ int mac_partition(struct parsed_partitions *state) int i, l; goodness++; - l = strlen(part->name); - if (strcmp(part->name, "/") == 0) + l = strnlen(part->name, sizeof(part->name)); + if (strncmp(part->name, "/", sizeof(part->name)) == 0) goodness++; for (i = 0; i <= l - 4; ++i) { if (strncasecmp(part->name + i, "root", diff --git a/drivers/accel/amdxdna/amdxdna_mailbox.c b/drivers/accel/amdxdna/amdxdna_mailbox.c index 814b16bb1953..e5301fac1397 100644 --- a/drivers/accel/amdxdna/amdxdna_mailbox.c +++ b/drivers/accel/amdxdna/amdxdna_mailbox.c @@ -8,6 +8,7 @@ #include <linux/bitfield.h> #include <linux/interrupt.h> #include <linux/iopoll.h> +#include <linux/slab.h> #include <linux/xarray.h> #define CREATE_TRACE_POINTS diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c index 97d4a032171f..f5b8497cf5ad 100644 --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c @@ -21,6 +21,11 @@ #define AMDXDNA_AUTOSUSPEND_DELAY 5000 /* milliseconds */ +MODULE_FIRMWARE("amdnpu/1502_00/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_10/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_11/npu.sbin"); +MODULE_FIRMWARE("amdnpu/17f0_20/npu.sbin"); + /* * Bind the driver base on (vendor_id, device_id) pair and later use the * (device_id, rev_id) pair as a key to select the devices. The devices with diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index 1e8ffbe25eee..38cf1c342c72 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -397,15 +397,19 @@ int ivpu_boot(struct ivpu_device *vdev) if (ivpu_fw_is_cold_boot(vdev)) { ret = ivpu_pm_dct_init(vdev); if (ret) - goto err_diagnose_failure; + goto err_disable_ipc; ret = ivpu_hw_sched_init(vdev); if (ret) - goto err_diagnose_failure; + goto err_disable_ipc; } return 0; +err_disable_ipc: + ivpu_ipc_disable(vdev); + ivpu_hw_irq_disable(vdev); + disable_irq(vdev->irq); err_diagnose_failure: ivpu_hw_diagnose_failure(vdev); ivpu_mmu_evtq_dump(vdev); diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 87d7411ae059..5060c5dd40d1 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -115,41 +115,57 @@ err_power_down: return ret; } -static void ivpu_pm_recovery_work(struct work_struct *work) +static void ivpu_pm_reset_begin(struct ivpu_device *vdev) { - struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); - struct ivpu_device *vdev = pm->vdev; - char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; - int ret; - - ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); - - ret = pm_runtime_resume_and_get(vdev->drm.dev); - if (ret) - ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); - - ivpu_jsm_state_dump(vdev); - ivpu_dev_coredump(vdev); + pm_runtime_disable(vdev->drm.dev); atomic_inc(&vdev->pm->reset_counter); atomic_set(&vdev->pm->reset_pending, 1); down_write(&vdev->pm->reset_lock); +} + +static void ivpu_pm_reset_complete(struct ivpu_device *vdev) +{ + int ret; - ivpu_suspend(vdev); ivpu_pm_prepare_cold_boot(vdev); ivpu_jobs_abort_all(vdev); ivpu_ms_cleanup_all(vdev); ret = ivpu_resume(vdev); - if (ret) + if (ret) { ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } else { + pm_runtime_set_active(vdev->drm.dev); + } up_write(&vdev->pm->reset_lock); atomic_set(&vdev->pm->reset_pending, 0); - kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); pm_runtime_mark_last_busy(vdev->drm.dev); - pm_runtime_put_autosuspend(vdev->drm.dev); + pm_runtime_enable(vdev->drm.dev); +} + +static void ivpu_pm_recovery_work(struct work_struct *work) +{ + struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, recovery_work); + struct ivpu_device *vdev = pm->vdev; + char *evt[2] = {"IVPU_PM_EVENT=IVPU_RECOVER", NULL}; + + ivpu_err(vdev, "Recovering the NPU (reset #%d)\n", atomic_read(&vdev->pm->reset_counter)); + + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_jsm_state_dump(vdev); + ivpu_dev_coredump(vdev); + ivpu_suspend(vdev); + } + + ivpu_pm_reset_complete(vdev); + + kobject_uevent_env(&vdev->drm.dev->kobj, KOBJ_CHANGE, evt); } void ivpu_pm_trigger_recovery(struct ivpu_device *vdev, const char *reason) @@ -309,7 +325,10 @@ int ivpu_rpm_get(struct ivpu_device *vdev) int ret; ret = pm_runtime_resume_and_get(vdev->drm.dev); - drm_WARN_ON(&vdev->drm, ret < 0); + if (ret < 0) { + ivpu_err(vdev, "Failed to resume NPU: %d\n", ret); + pm_runtime_set_suspended(vdev->drm.dev); + } return ret; } @@ -325,16 +344,13 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) struct ivpu_device *vdev = pci_get_drvdata(pdev); ivpu_dbg(vdev, PM, "Pre-reset..\n"); - atomic_inc(&vdev->pm->reset_counter); - atomic_set(&vdev->pm->reset_pending, 1); - pm_runtime_get_sync(vdev->drm.dev); - down_write(&vdev->pm->reset_lock); - ivpu_prepare_for_reset(vdev); - ivpu_hw_reset(vdev); - ivpu_pm_prepare_cold_boot(vdev); - ivpu_jobs_abort_all(vdev); - ivpu_ms_cleanup_all(vdev); + ivpu_pm_reset_begin(vdev); + + if (!pm_runtime_status_suspended(vdev->drm.dev)) { + ivpu_prepare_for_reset(vdev); + ivpu_hw_reset(vdev); + } ivpu_dbg(vdev, PM, "Pre-reset done.\n"); } @@ -342,18 +358,12 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) void ivpu_pm_reset_done_cb(struct pci_dev *pdev) { struct ivpu_device *vdev = pci_get_drvdata(pdev); - int ret; ivpu_dbg(vdev, PM, "Post-reset..\n"); - ret = ivpu_resume(vdev); - if (ret) - ivpu_err(vdev, "Failed to set RESUME state: %d\n", ret); - up_write(&vdev->pm->reset_lock); - atomic_set(&vdev->pm->reset_pending, 0); - ivpu_dbg(vdev, PM, "Post-reset done.\n"); - pm_runtime_mark_last_busy(vdev->drm.dev); - pm_runtime_put_autosuspend(vdev->drm.dev); + ivpu_pm_reset_complete(vdev); + + ivpu_dbg(vdev, PM, "Post-reset done.\n"); } void ivpu_pm_init(struct ivpu_device *vdev) diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c index 3561553eff8b..70f8290b659d 100644 --- a/drivers/acpi/arm64/gtdt.c +++ b/drivers/acpi/arm64/gtdt.c @@ -163,7 +163,7 @@ int __init acpi_gtdt_init(struct acpi_table_header *table, { void *platform_timer; struct acpi_table_gtdt *gtdt; - int cnt = 0; + u32 cnt = 0; gtdt = container_of(table, struct acpi_table_gtdt, header); acpi_gtdt_desc.gtdt = gtdt; @@ -188,13 +188,17 @@ int __init acpi_gtdt_init(struct acpi_table_header *table, cnt++; if (cnt != gtdt->platform_timer_count) { + cnt = min(cnt, gtdt->platform_timer_count); + pr_err(FW_BUG "limiting Platform Timer count to %d\n", cnt); + } + + if (!cnt) { acpi_gtdt_desc.platform_timer = NULL; - pr_err(FW_BUG "invalid timer data.\n"); - return -EINVAL; + return 0; } if (platform_timer_count) - *platform_timer_count = gtdt->platform_timer_count; + *platform_timer_count = cnt; return 0; } diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index fc92e43d0fe9..2ad53cc6aae5 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -417,8 +417,14 @@ static int profile_class_registered(struct device *dev, const void *data) static umode_t profile_class_is_visible(struct kobject *kobj, struct attribute *attr, int idx) { - if (!class_find_device(&platform_profile_class, NULL, NULL, profile_class_registered)) + struct device *dev; + + dev = class_find_device(&platform_profile_class, NULL, NULL, profile_class_registered); + if (!dev) return 0; + + put_device(dev); + return attr->mode; } diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c index 747f83f7114d..e549914a636c 100644 --- a/drivers/acpi/prmt.c +++ b/drivers/acpi/prmt.c @@ -287,9 +287,7 @@ static acpi_status acpi_platformrt_space_handler(u32 function, if (!handler || !module) goto invalid_guid; - if (!handler->handler_addr || - !handler->static_data_buffer_addr || - !handler->acpi_param_buffer_addr) { + if (!handler->handler_addr) { buffer->prm_status = PRM_HANDLER_ERROR; return AE_OK; } diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 98d93ed58315..436019d96027 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1187,8 +1187,6 @@ static int acpi_data_prop_read(const struct acpi_device_data *data, } break; } - if (nval == 0) - return -EINVAL; if (obj->type == ACPI_TYPE_BUFFER) { if (proptype != DEV_PROP_U8) @@ -1212,9 +1210,11 @@ static int acpi_data_prop_read(const struct acpi_device_data *data, ret = acpi_copy_property_array_uint(items, (u64 *)val, nval); break; case DEV_PROP_STRING: - ret = acpi_copy_property_array_string( - items, (char **)val, - min_t(u32, nval, obj->package.count)); + nval = min_t(u32, nval, obj->package.count); + if (nval == 0) + return -ENODATA; + + ret = acpi_copy_property_array_string(items, (char **)val, nval); break; default: ret = -EINVAL; diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 90aaec923889..b4cd14e7fa76 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -564,6 +564,12 @@ static const struct dmi_system_id irq1_edge_low_force_override[] = { }, }, { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Eluktronics Inc."), + DMI_MATCH(DMI_BOARD_NAME, "MECH-17"), + }, + }, + { /* TongFang GM6XGxX/TUXEDO Stellaris 16 Gen5 AMD */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "GM6XGxX"), diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 8e895ae45c86..c842e2de6ef9 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -386,8 +386,12 @@ struct ahci_host_priv { static inline bool ahci_ignore_port(struct ahci_host_priv *hpriv, unsigned int portid) { - return portid >= hpriv->nports || - !(hpriv->mask_port_map & (1 << portid)); + if (portid >= hpriv->nports) + return true; + /* mask_port_map not set means that all ports are available */ + if (!hpriv->mask_port_map) + return false; + return !(hpriv->mask_port_map & (1 << portid)); } extern int ahci_ignore_sss; diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index fdfa7b266218..e7ace4b10f15 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -541,6 +541,7 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv) hpriv->saved_port_map = port_map; } + /* mask_port_map not set means that all ports are available */ if (hpriv->mask_port_map) { dev_warn(dev, "masking port_map 0x%lx -> 0x%lx\n", port_map, diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 53b2c7719dc5..91d44302eac9 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -651,8 +651,6 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, * If no sub-node was found, keep this for device tree * compatibility */ - hpriv->mask_port_map |= BIT(0); - rc = ahci_platform_get_phy(hpriv, 0, dev, dev->of_node); if (rc) goto err_out; diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 63ec2f218431..c085dd81ebe7 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4143,10 +4143,6 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "Samsung SSD 860*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, - { "Samsung SSD 870 QVO*", NULL, ATA_QUIRK_NO_NCQ_TRIM | - ATA_QUIRK_ZERO_AFTER_TRIM | - ATA_QUIRK_NO_NCQ_ON_ATI | - ATA_QUIRK_NOLPM }, { "Samsung SSD 870*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 7fb21768ca36..8074a10183dc 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -6,7 +6,7 @@ obj-y := component.o core.o bus.o dd.o syscore.o \ cpu.o firmware.o init.o map.o devres.o \ attribute_container.o transport_class.o \ topology.o container.o property.o cacheinfo.o \ - swnode.o + swnode.o faux.o obj-$(CONFIG_AUXILIARY_BUS) += auxiliary.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o obj-y += power/ diff --git a/drivers/base/base.h b/drivers/base/base.h index 8cf04a557bdb..0042e4774b0c 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -137,6 +137,7 @@ int hypervisor_init(void); static inline int hypervisor_init(void) { return 0; } #endif int platform_bus_init(void); +int faux_bus_init(void); void cpu_dev_init(void); void container_dev_init(void); #ifdef CONFIG_AUXILIARY_BUS diff --git a/drivers/base/faux.c b/drivers/base/faux.c new file mode 100644 index 000000000000..531e9d789ee0 --- /dev/null +++ b/drivers/base/faux.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2025 Greg Kroah-Hartman <gregkh@linuxfoundation.org> + * Copyright (c) 2025 The Linux Foundation + * + * A "simple" faux bus that allows devices to be created and added + * automatically to it. This is to be used whenever you need to create a + * device that is not associated with any "real" system resources, and do + * not want to have to deal with a bus/driver binding logic. It is + * intended to be very simple, with only a create and a destroy function + * available. + */ +#include <linux/err.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/container_of.h> +#include <linux/device/faux.h> +#include "base.h" + +/* + * Internal wrapper structure so we can hold a pointer to the + * faux_device_ops for this device. + */ +struct faux_object { + struct faux_device faux_dev; + const struct faux_device_ops *faux_ops; +}; +#define to_faux_object(dev) container_of_const(dev, struct faux_object, faux_dev.dev) + +static struct device faux_bus_root = { + .init_name = "faux", +}; + +static int faux_match(struct device *dev, const struct device_driver *drv) +{ + /* Match always succeeds, we only have one driver */ + return 1; +} + +static int faux_probe(struct device *dev) +{ + struct faux_object *faux_obj = to_faux_object(dev); + struct faux_device *faux_dev = &faux_obj->faux_dev; + const struct faux_device_ops *faux_ops = faux_obj->faux_ops; + int ret = 0; + + if (faux_ops && faux_ops->probe) + ret = faux_ops->probe(faux_dev); + + return ret; +} + +static void faux_remove(struct device *dev) +{ + struct faux_object *faux_obj = to_faux_object(dev); + struct faux_device *faux_dev = &faux_obj->faux_dev; + const struct faux_device_ops *faux_ops = faux_obj->faux_ops; + + if (faux_ops && faux_ops->remove) + faux_ops->remove(faux_dev); +} + +static const struct bus_type faux_bus_type = { + .name = "faux", + .match = faux_match, + .probe = faux_probe, + .remove = faux_remove, +}; + +static struct device_driver faux_driver = { + .name = "faux_driver", + .bus = &faux_bus_type, + .probe_type = PROBE_FORCE_SYNCHRONOUS, +}; + +static void faux_device_release(struct device *dev) +{ + struct faux_object *faux_obj = to_faux_object(dev); + + kfree(faux_obj); +} + +/** + * faux_device_create_with_groups - Create and register with the driver + * core a faux device and populate the device with an initial + * set of sysfs attributes. + * @name: The name of the device we are adding, must be unique for + * all faux devices. + * @parent: Pointer to a potential parent struct device. If set to + * NULL, the device will be created in the "root" of the faux + * device tree in sysfs. + * @faux_ops: struct faux_device_ops that the new device will call back + * into, can be NULL. + * @groups: The set of sysfs attributes that will be created for this + * device when it is registered with the driver core. + * + * Create a new faux device and register it in the driver core properly. + * If present, callbacks in @faux_ops will be called with the device that + * for the caller to do something with at the proper time given the + * device's lifecycle. + * + * Note, when this function is called, the functions specified in struct + * faux_ops can be called before the function returns, so be prepared for + * everything to be properly initialized before that point in time. + * + * Return: + * * NULL if an error happened with creating the device + * * pointer to a valid struct faux_device that is registered with sysfs + */ +struct faux_device *faux_device_create_with_groups(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops, + const struct attribute_group **groups) +{ + struct faux_object *faux_obj; + struct faux_device *faux_dev; + struct device *dev; + int ret; + + faux_obj = kzalloc(sizeof(*faux_obj), GFP_KERNEL); + if (!faux_obj) + return NULL; + + /* Save off the callbacks so we can use them in the future */ + faux_obj->faux_ops = faux_ops; + + /* Initialize the device portion and register it with the driver core */ + faux_dev = &faux_obj->faux_dev; + dev = &faux_dev->dev; + + device_initialize(dev); + dev->release = faux_device_release; + if (parent) + dev->parent = parent; + else + dev->parent = &faux_bus_root; + dev->bus = &faux_bus_type; + dev->groups = groups; + dev_set_name(dev, "%s", name); + + ret = device_add(dev); + if (ret) { + pr_err("%s: device_add for faux device '%s' failed with %d\n", + __func__, name, ret); + put_device(dev); + return NULL; + } + + return faux_dev; +} +EXPORT_SYMBOL_GPL(faux_device_create_with_groups); + +/** + * faux_device_create - create and register with the driver core a faux device + * @name: The name of the device we are adding, must be unique for all + * faux devices. + * @parent: Pointer to a potential parent struct device. If set to + * NULL, the device will be created in the "root" of the faux + * device tree in sysfs. + * @faux_ops: struct faux_device_ops that the new device will call back + * into, can be NULL. + * + * Create a new faux device and register it in the driver core properly. + * If present, callbacks in @faux_ops will be called with the device that + * for the caller to do something with at the proper time given the + * device's lifecycle. + * + * Note, when this function is called, the functions specified in struct + * faux_ops can be called before the function returns, so be prepared for + * everything to be properly initialized before that point in time. + * + * Return: + * * NULL if an error happened with creating the device + * * pointer to a valid struct faux_device that is registered with sysfs + */ +struct faux_device *faux_device_create(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops) +{ + return faux_device_create_with_groups(name, parent, faux_ops, NULL); +} +EXPORT_SYMBOL_GPL(faux_device_create); + +/** + * faux_device_destroy - destroy a faux device + * @faux_dev: faux device to destroy + * + * Unregisters and cleans up a device that was created with a call to + * faux_device_create() + */ +void faux_device_destroy(struct faux_device *faux_dev) +{ + struct device *dev = &faux_dev->dev; + + if (!faux_dev) + return; + + device_del(dev); + + /* The final put_device() will clean up the memory we allocated for this device. */ + put_device(dev); +} +EXPORT_SYMBOL_GPL(faux_device_destroy); + +int __init faux_bus_init(void) +{ + int ret; + + ret = device_register(&faux_bus_root); + if (ret) { + put_device(&faux_bus_root); + return ret; + } + + ret = bus_register(&faux_bus_type); + if (ret) + goto error_bus; + + ret = driver_register(&faux_driver); + if (ret) + goto error_driver; + + return ret; + +error_driver: + bus_unregister(&faux_bus_type); + +error_bus: + device_unregister(&faux_bus_root); + return ret; +} diff --git a/drivers/base/init.c b/drivers/base/init.c index c4954835128c..9d2b06d65dfc 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -32,6 +32,7 @@ void __init driver_init(void) /* These are also core pieces, but must come after the * core core pieces. */ + faux_bus_init(); of_core_init(); platform_bus_init(); auxiliary_bus_init(); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index d497d448e4b2..40e1d8d8a589 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1191,24 +1191,18 @@ static pm_message_t resume_event(pm_message_t sleep_state) return PMSG_ON; } -static void dpm_superior_set_must_resume(struct device *dev, bool set_active) +static void dpm_superior_set_must_resume(struct device *dev) { struct device_link *link; int idx; - if (dev->parent) { + if (dev->parent) dev->parent->power.must_resume = true; - if (set_active) - dev->parent->power.set_active = true; - } idx = device_links_read_lock(); - list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) { + list_for_each_entry_rcu_locked(link, &dev->links.suppliers, c_node) link->supplier->power.must_resume = true; - if (set_active) - link->supplier->power.set_active = true; - } device_links_read_unlock(idx); } @@ -1287,9 +1281,12 @@ Skip: dev->power.must_resume = true; if (dev->power.must_resume) { - dev->power.set_active = dev->power.set_active || - dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND); - dpm_superior_set_must_resume(dev, dev->power.set_active); + if (dev_pm_test_driver_flags(dev, DPM_FLAG_SMART_SUSPEND)) { + dev->power.set_active = true; + if (dev->parent && !dev->parent->power.ignore_children) + dev->parent->power.set_active = true; + } + dpm_superior_set_must_resume(dev); } Complete: diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 0bcd81389a29..978613407ea3 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -906,6 +906,7 @@ err_alloc: kfree(d->wake_buf); kfree(d->mask_buf_def); kfree(d->mask_buf); + kfree(d->main_status_buf); kfree(d->status_buf); kfree(d->status_reg_buf); if (d->config_buf) { @@ -981,6 +982,7 @@ void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *d) kfree(d->wake_buf); kfree(d->mask_buf_def); kfree(d->mask_buf); + kfree(d->main_status_buf); kfree(d->status_reg_buf); kfree(d->status_buf); if (d->config_buf) { diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 33b3bc99d532..282f81616a78 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -1127,8 +1127,8 @@ static void vdc_queue_drain(struct vdc_port *port) spin_lock_irq(&port->vio.lock); port->drain = 0; - blk_mq_unquiesce_queue(q, memflags); - blk_mq_unfreeze_queue(q); + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q, memflags); } static void vdc_ldc_reset_timer_work(struct work_struct *work) diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 2b79952f3628..091ffe3e1495 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1320,6 +1320,10 @@ static int btintel_pcie_send_frame(struct hci_dev *hdev, if (opcode == 0xfc01) btintel_pcie_inject_cmd_complete(hdev, opcode); } + /* Firmware raises alive interrupt on HCI_OP_RESET */ + if (opcode == HCI_OP_RESET) + data->gp0_received = false; + hdev->stat.cmd_tx++; break; case HCI_ACLDATA_PKT: @@ -1357,7 +1361,6 @@ static int btintel_pcie_send_frame(struct hci_dev *hdev, opcode, btintel_pcie_alivectxt_state2str(old_ctxt), btintel_pcie_alivectxt_state2str(data->alive_intr_ctxt)); if (opcode == HCI_OP_RESET) { - data->gp0_received = false; ret = wait_event_timeout(data->gp0_wait_q, data->gp0_received, msecs_to_jiffies(BTINTEL_DEFAULT_INTR_TIMEOUT_MS)); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 90966dfbd278..8149e53fd0a7 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2102,7 +2102,8 @@ static int btusb_send_frame(struct hci_dev *hdev, struct sk_buff *skb) return submit_or_queue_tx_urb(hdev, urb); case HCI_SCODATA_PKT: - if (hci_conn_num(hdev, SCO_LINK) < 1) + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hci_conn_num(hdev, SCO_LINK) < 1) return -ENODEV; urb = alloc_isoc_urb(hdev, skb); @@ -2576,7 +2577,8 @@ static int btusb_send_frame_intel(struct hci_dev *hdev, struct sk_buff *skb) return submit_or_queue_tx_urb(hdev, urb); case HCI_SCODATA_PKT: - if (hci_conn_num(hdev, SCO_LINK) < 1) + if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + hci_conn_num(hdev, SCO_LINK) < 1) return -ENODEV; urb = alloc_isoc_urb(hdev, skb); diff --git a/drivers/bus/moxtet.c b/drivers/bus/moxtet.c index 6276551d7968..1e57ebfb7622 100644 --- a/drivers/bus/moxtet.c +++ b/drivers/bus/moxtet.c @@ -657,7 +657,7 @@ static void moxtet_irq_print_chip(struct irq_data *d, struct seq_file *p) id = moxtet->modules[pos->idx]; - seq_printf(p, " moxtet-%s.%i#%i", mox_module_name(id), pos->idx, + seq_printf(p, "moxtet-%s.%i#%i", mox_module_name(id), pos->idx, pos->bit); } diff --git a/drivers/clocksource/jcore-pit.c b/drivers/clocksource/jcore-pit.c index a3fe98cd3838..82815428f8f9 100644 --- a/drivers/clocksource/jcore-pit.c +++ b/drivers/clocksource/jcore-pit.c @@ -114,6 +114,18 @@ static int jcore_pit_local_init(unsigned cpu) pit->periodic_delta = DIV_ROUND_CLOSEST(NSEC_PER_SEC, HZ * buspd); clockevents_config_and_register(&pit->ced, freq, 1, ULONG_MAX); + enable_percpu_irq(pit->ced.irq, IRQ_TYPE_NONE); + + return 0; +} + +static int jcore_pit_local_teardown(unsigned cpu) +{ + struct jcore_pit *pit = this_cpu_ptr(jcore_pit_percpu); + + pr_info("Local J-Core PIT teardown on cpu %u\n", cpu); + + disable_percpu_irq(pit->ced.irq); return 0; } @@ -168,6 +180,7 @@ static int __init jcore_pit_init(struct device_node *node) return -ENOMEM; } + irq_set_percpu_devid(pit_irq); err = request_percpu_irq(pit_irq, jcore_timer_interrupt, "jcore_pit", jcore_pit_percpu); if (err) { @@ -237,7 +250,7 @@ static int __init jcore_pit_init(struct device_node *node) cpuhp_setup_state(CPUHP_AP_JCORE_TIMER_STARTING, "clockevents/jcore:starting", - jcore_pit_local_init, NULL); + jcore_pit_local_init, jcore_pit_local_teardown); return 0; } diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index 0ee5c691fb36..9e46960f6a86 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -17,7 +17,8 @@ config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM config ARM_AIROHA_SOC_CPUFREQ tristate "Airoha EN7581 SoC CPUFreq support" - depends on (ARCH_AIROHA && OF) || COMPILE_TEST + depends on ARCH_AIROHA || COMPILE_TEST + depends on OF select PM_OPP default ARCH_AIROHA help diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index dd9b8d6993d6..313550fa62d4 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -699,7 +699,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (min_perf < lowest_nonlinear_perf) min_perf = lowest_nonlinear_perf; - max_perf = cap_perf; + max_perf = cpudata->max_limit_perf; if (max_perf < min_perf) max_perf = min_perf; @@ -747,7 +747,6 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) guard(mutex)(&amd_pstate_driver_lock); ret = amd_pstate_cpu_boost_update(policy, state); - policy->boost_enabled = !ret ? state : false; refresh_frequency_limits(policy); return ret; @@ -822,25 +821,28 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) static void amd_pstate_update_limits(unsigned int cpu) { - struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct cpufreq_policy *policy = NULL; struct amd_cpudata *cpudata; u32 prev_high = 0, cur_high = 0; int ret; bool highest_perf_changed = false; + if (!amd_pstate_prefcore) + return; + + policy = cpufreq_cpu_get(cpu); if (!policy) return; cpudata = policy->driver_data; - if (!amd_pstate_prefcore) - return; - guard(mutex)(&amd_pstate_driver_lock); ret = amd_get_highest_perf(cpu, &cur_high); - if (ret) - goto free_cpufreq_put; + if (ret) { + cpufreq_cpu_put(policy); + return; + } prev_high = READ_ONCE(cpudata->prefcore_ranking); highest_perf_changed = (prev_high != cur_high); @@ -850,8 +852,6 @@ static void amd_pstate_update_limits(unsigned int cpu) if (cur_high < CPPC_MAX_PERF) sched_set_itmt_core_prio((int)cur_high, cpu); } - -free_cpufreq_put: cpufreq_cpu_put(policy); if (!highest_perf_changed) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index e0048856ecee..30ffbddc7ece 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1571,7 +1571,8 @@ static int cpufreq_online(unsigned int cpu) policy->cdev = of_cpufreq_cooling_register(policy); /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */ - if (policy->boost_enabled != cpufreq_boost_enabled()) { + if (cpufreq_driver->set_boost && + policy->boost_enabled != cpufreq_boost_enabled()) { policy->boost_enabled = cpufreq_boost_enabled(); ret = cpufreq_driver->set_boost(policy, policy->boost_enabled); if (ret) { diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c index 7eb3e4668286..3467f6db4f50 100644 --- a/drivers/crypto/ccp/sp-dev.c +++ b/drivers/crypto/ccp/sp-dev.c @@ -19,6 +19,7 @@ #include <linux/types.h> #include <linux/ccp.h> +#include "sev-dev.h" #include "ccp-dev.h" #include "sp-dev.h" @@ -253,8 +254,12 @@ unlock: static int __init sp_mod_init(void) { #ifdef CONFIG_X86 + static bool initialized; int ret; + if (initialized) + return 0; + ret = sp_pci_init(); if (ret) return ret; @@ -263,6 +268,8 @@ static int __init sp_mod_init(void) psp_pci_init(); #endif + initialized = true; + return 0; #endif @@ -279,6 +286,13 @@ static int __init sp_mod_init(void) return -ENODEV; } +#if IS_BUILTIN(CONFIG_KVM_AMD) && IS_ENABLED(CONFIG_KVM_AMD_SEV) +int __init sev_module_init(void) +{ + return sp_mod_init(); +} +#endif + static void __exit sp_mod_exit(void) { #ifdef CONFIG_X86 diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 6896da8ac7ef..5c6a5b358987 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -887,7 +887,7 @@ static int tegra_adma_probe(struct platform_device *pdev) const struct tegra_adma_chip_data *cdata; struct tegra_adma *tdma; struct resource *res_page, *res_base; - int ret, i, page_no; + int ret, i; cdata = of_device_get_match_data(&pdev->dev); if (!cdata) { @@ -914,9 +914,20 @@ static int tegra_adma_probe(struct platform_device *pdev) res_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "global"); if (res_base) { - page_no = (res_page->start - res_base->start) / cdata->ch_base_offset; - if (page_no <= 0) + resource_size_t page_offset, page_no; + unsigned int ch_base_offset; + + if (res_page->start < res_base->start) + return -EINVAL; + page_offset = res_page->start - res_base->start; + ch_base_offset = cdata->ch_base_offset; + if (!ch_base_offset) return -EINVAL; + + page_no = div_u64(page_offset, ch_base_offset); + if (!page_no || page_no > INT_MAX) + return -EINVAL; + tdma->ch_page_no = page_no - 1; tdma->base_addr = devm_ioremap_resource(&pdev->dev, res_base); if (IS_ERR(tdma->base_addr)) diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c index 04c42c83a2ba..f3da9385ca0d 100644 --- a/drivers/edac/qcom_edac.c +++ b/drivers/edac/qcom_edac.c @@ -95,7 +95,7 @@ static int qcom_llcc_core_setup(struct llcc_drv_data *drv, struct regmap *llcc_b * Configure interrupt enable registers such that Tag, Data RAM related * interrupts are propagated to interrupt controller for servicing */ - ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable, + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_0_enable, TRP0_INTERRUPT_ENABLE, TRP0_INTERRUPT_ENABLE); if (ret) @@ -113,7 +113,7 @@ static int qcom_llcc_core_setup(struct llcc_drv_data *drv, struct regmap *llcc_b if (ret) return ret; - ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_2_enable, + ret = regmap_update_bits(llcc_bcast_regmap, drv->edac_reg_offset->cmn_interrupt_0_enable, DRP0_INTERRUPT_ENABLE, DRP0_INTERRUPT_ENABLE); if (ret) diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 71d8b26c4103..9f35f69e0f9e 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -106,7 +106,7 @@ config ISCSI_IBFT select ISCSI_BOOT_SYSFS select ISCSI_IBFT_FIND if X86 depends on ACPI && SCSI && SCSI_LOWLEVEL - default n + default n help This option enables support for detection and exposing of iSCSI Boot Firmware Table (iBFT) via sysfs to userspace. If you wish to diff --git a/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c b/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c index 83b69fc4fba5..a8915d3b4df5 100644 --- a/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c +++ b/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c @@ -254,8 +254,8 @@ static int scmi_imx_misc_ctrl_set(const struct scmi_protocol_handle *ph, if (num > max_num) return -EINVAL; - ret = ph->xops->xfer_get_init(ph, SCMI_IMX_MISC_CTRL_SET, sizeof(*in), - 0, &t); + ret = ph->xops->xfer_get_init(ph, SCMI_IMX_MISC_CTRL_SET, + sizeof(*in) + num * sizeof(__le32), 0, &t); if (ret) return ret; diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index 5365e9a43000..42433c19eb30 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -1609,8 +1609,8 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, goto out_fw; } - ret = regmap_raw_write_async(regmap, reg, buf->buf, - le32_to_cpu(region->len)); + ret = regmap_raw_write(regmap, reg, buf->buf, + le32_to_cpu(region->len)); if (ret != 0) { cs_dsp_err(dsp, "%s.%d: Failed to write %d bytes at %d in %s: %d\n", @@ -1625,12 +1625,6 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, regions++; } - ret = regmap_async_complete(regmap); - if (ret != 0) { - cs_dsp_err(dsp, "Failed to complete async write: %d\n", ret); - goto out_fw; - } - if (pos > firmware->size) cs_dsp_warn(dsp, "%s.%d: %zu bytes at end of file\n", file, regions, pos - firmware->size); @@ -1638,7 +1632,6 @@ static int cs_dsp_load(struct cs_dsp *dsp, const struct firmware *firmware, cs_dsp_debugfs_save_wmfwname(dsp, file); out_fw: - regmap_async_complete(regmap); cs_dsp_buf_free(&buf_list); if (ret == -EOVERFLOW) @@ -2326,8 +2319,8 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware cs_dsp_dbg(dsp, "%s.%d: Writing %d bytes at %x\n", file, blocks, le32_to_cpu(blk->len), reg); - ret = regmap_raw_write_async(regmap, reg, buf->buf, - le32_to_cpu(blk->len)); + ret = regmap_raw_write(regmap, reg, buf->buf, + le32_to_cpu(blk->len)); if (ret != 0) { cs_dsp_err(dsp, "%s.%d: Failed to write to %x in %s: %d\n", @@ -2339,10 +2332,6 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware blocks++; } - ret = regmap_async_complete(regmap); - if (ret != 0) - cs_dsp_err(dsp, "Failed to complete async write: %d\n", ret); - if (pos > firmware->size) cs_dsp_warn(dsp, "%s.%d: %zu bytes at end of file\n", file, blocks, pos - firmware->size); @@ -2350,7 +2339,6 @@ static int cs_dsp_load_coeff(struct cs_dsp *dsp, const struct firmware *firmware cs_dsp_debugfs_save_binname(dsp, file); out_fw: - regmap_async_complete(regmap); cs_dsp_buf_free(&buf_list); if (ret == -EOVERFLOW) @@ -2561,8 +2549,8 @@ static int cs_dsp_adsp2_enable_core(struct cs_dsp *dsp) { int ret; - ret = regmap_update_bits_async(dsp->regmap, dsp->base + ADSP2_CONTROL, - ADSP2_SYS_ENA, ADSP2_SYS_ENA); + ret = regmap_update_bits(dsp->regmap, dsp->base + ADSP2_CONTROL, + ADSP2_SYS_ENA, ADSP2_SYS_ENA); if (ret != 0) return ret; diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c index fa9c1c3bf168..f0a63d09d3c4 100644 --- a/drivers/firmware/efi/cper-arm.c +++ b/drivers/firmware/efi/cper-arm.c @@ -311,7 +311,7 @@ void cper_print_proc_arm(const char *pfx, ctx_info = (struct cper_arm_ctx_info *)err_info; max_ctx_type = ARRAY_SIZE(arm_reg_ctx_strs) - 1; for (i = 0; i < proc->context_info_num; i++) { - int size = sizeof(*ctx_info) + ctx_info->size; + int size = ALIGN(sizeof(*ctx_info) + ctx_info->size, 16); printk("%sContext info structure %d:\n", pfx, i); if (len < size) { diff --git a/drivers/firmware/efi/cper-x86.c b/drivers/firmware/efi/cper-x86.c index 438ed9eff6d0..3949d7b5e808 100644 --- a/drivers/firmware/efi/cper-x86.c +++ b/drivers/firmware/efi/cper-x86.c @@ -325,7 +325,7 @@ void cper_print_proc_ia(const char *pfx, const struct cper_sec_proc_ia *proc) ctx_info = (struct cper_ia_proc_ctx *)err_info; for (i = 0; i < VALID_PROC_CXT_INFO_NUM(proc->validation_bits); i++) { - int size = sizeof(*ctx_info) + ctx_info->reg_arr_size; + int size = ALIGN(sizeof(*ctx_info) + ctx_info->reg_arr_size, 16); int groupsize = 4; printk("%sContext Information Structure %d:\n", pfx, i); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 8296bf985d1d..7309394b8fc9 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -934,13 +934,15 @@ char * __init efi_md_typeattr_format(char *buf, size_t size, EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_RO | EFI_MEMORY_WP | EFI_MEMORY_RP | EFI_MEMORY_XP | EFI_MEMORY_NV | EFI_MEMORY_SP | EFI_MEMORY_CPU_CRYPTO | - EFI_MEMORY_RUNTIME | EFI_MEMORY_MORE_RELIABLE)) + EFI_MEMORY_MORE_RELIABLE | EFI_MEMORY_HOT_PLUGGABLE | + EFI_MEMORY_RUNTIME)) snprintf(pos, size, "|attr=0x%016llx]", (unsigned long long)attr); else snprintf(pos, size, - "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", + "|%3s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]", attr & EFI_MEMORY_RUNTIME ? "RUN" : "", + attr & EFI_MEMORY_HOT_PLUGGABLE ? "HP" : "", attr & EFI_MEMORY_MORE_RELIABLE ? "MR" : "", attr & EFI_MEMORY_CPU_CRYPTO ? "CC" : "", attr & EFI_MEMORY_SP ? "SP" : "", diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c index e5872e38d9a4..5a732018be36 100644 --- a/drivers/firmware/efi/libstub/randomalloc.c +++ b/drivers/firmware/efi/libstub/randomalloc.c @@ -25,6 +25,9 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, if (md->type != EFI_CONVENTIONAL_MEMORY) return 0; + if (md->attribute & EFI_MEMORY_HOT_PLUGGABLE) + return 0; + if (efi_soft_reserve_enabled() && (md->attribute & EFI_MEMORY_SP)) return 0; diff --git a/drivers/firmware/efi/libstub/relocate.c b/drivers/firmware/efi/libstub/relocate.c index 99b45d1cd624..d4264bfb6dc1 100644 --- a/drivers/firmware/efi/libstub/relocate.c +++ b/drivers/firmware/efi/libstub/relocate.c @@ -53,6 +53,9 @@ efi_status_t efi_low_alloc_above(unsigned long size, unsigned long align, if (desc->type != EFI_CONVENTIONAL_MEMORY) continue; + if (desc->attribute & EFI_MEMORY_HOT_PLUGGABLE) + continue; + if (efi_soft_reserve_enabled() && (desc->attribute & EFI_MEMORY_SP)) continue; diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c index 5ed0602c2f75..208db29613c6 100644 --- a/drivers/firmware/efi/mokvar-table.c +++ b/drivers/firmware/efi/mokvar-table.c @@ -99,14 +99,13 @@ static struct kobject *mokvar_kobj; */ void __init efi_mokvar_table_init(void) { + struct efi_mokvar_table_entry __aligned(1) *mokvar_entry, *next_entry; efi_memory_desc_t md; void *va = NULL; unsigned long cur_offset = 0; unsigned long offset_limit; - unsigned long map_size = 0; unsigned long map_size_needed = 0; unsigned long size; - struct efi_mokvar_table_entry *mokvar_entry; int err; if (!efi_enabled(EFI_MEMMAP)) @@ -134,48 +133,46 @@ void __init efi_mokvar_table_init(void) */ err = -EINVAL; while (cur_offset + sizeof(*mokvar_entry) <= offset_limit) { - mokvar_entry = va + cur_offset; - map_size_needed = cur_offset + sizeof(*mokvar_entry); - if (map_size_needed > map_size) { - if (va) - early_memunmap(va, map_size); - /* - * Map a little more than the fixed size entry - * header, anticipating some data. It's safe to - * do so as long as we stay within current memory - * descriptor. - */ - map_size = min(map_size_needed + 2*EFI_PAGE_SIZE, - offset_limit); - va = early_memremap(efi.mokvar_table, map_size); - if (!va) { - pr_err("Failed to map EFI MOKvar config table pa=0x%lx, size=%lu.\n", - efi.mokvar_table, map_size); - return; - } - mokvar_entry = va + cur_offset; + if (va) + early_memunmap(va, sizeof(*mokvar_entry)); + va = early_memremap(efi.mokvar_table + cur_offset, sizeof(*mokvar_entry)); + if (!va) { + pr_err("Failed to map EFI MOKvar config table pa=0x%lx, size=%zu.\n", + efi.mokvar_table + cur_offset, sizeof(*mokvar_entry)); + return; } - + mokvar_entry = va; +next: /* Check for last sentinel entry */ if (mokvar_entry->name[0] == '\0') { if (mokvar_entry->data_size != 0) break; err = 0; + map_size_needed = cur_offset + sizeof(*mokvar_entry); break; } - /* Sanity check that the name is null terminated */ - size = strnlen(mokvar_entry->name, - sizeof(mokvar_entry->name)); - if (size >= sizeof(mokvar_entry->name)) - break; + /* Enforce that the name is NUL terminated */ + mokvar_entry->name[sizeof(mokvar_entry->name) - 1] = '\0'; /* Advance to the next entry */ - cur_offset = map_size_needed + mokvar_entry->data_size; + size = sizeof(*mokvar_entry) + mokvar_entry->data_size; + cur_offset += size; + + /* + * Don't bother remapping if the current entry header and the + * next one end on the same page. + */ + next_entry = (void *)((unsigned long)mokvar_entry + size); + if (((((unsigned long)(mokvar_entry + 1) - 1) ^ + ((unsigned long)(next_entry + 1) - 1)) & PAGE_MASK) == 0) { + mokvar_entry = next_entry; + goto next; + } } if (va) - early_memunmap(va, map_size); + early_memunmap(va, sizeof(*mokvar_entry)); if (err) { pr_err("EFI MOKvar config table is not valid\n"); return; diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig index 907cd149c40a..c964f4924359 100644 --- a/drivers/firmware/imx/Kconfig +++ b/drivers/firmware/imx/Kconfig @@ -25,6 +25,7 @@ config IMX_SCU config IMX_SCMI_MISC_DRV tristate "IMX SCMI MISC Protocol driver" + depends on ARCH_MXC || COMPILE_TEST default y if ARCH_MXC help The System Controller Management Interface firmware (SCMI FW) is diff --git a/drivers/firmware/iscsi_ibft.c b/drivers/firmware/iscsi_ibft.c index 6e9788324fea..371f24569b3b 100644 --- a/drivers/firmware/iscsi_ibft.c +++ b/drivers/firmware/iscsi_ibft.c @@ -310,7 +310,10 @@ static ssize_t ibft_attr_show_nic(void *data, int type, char *buf) str += sprintf_ipaddr(str, nic->ip_addr); break; case ISCSI_BOOT_ETH_SUBNET_MASK: - val = cpu_to_be32(~((1 << (32-nic->subnet_mask_prefix))-1)); + if (nic->subnet_mask_prefix > 32) + val = cpu_to_be32(~0); + else + val = cpu_to_be32(~((1 << (32-nic->subnet_mask_prefix))-1)); str += sprintf(str, "%pI4", &val); break; case ISCSI_BOOT_ETH_PREFIX_LEN: diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index add5ad29a673..98b4d1633b25 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -338,6 +338,7 @@ config GPIO_GRANITERAPIDS config GPIO_GRGPIO tristate "Aeroflex Gaisler GRGPIO support" + depends on OF || COMPILE_TEST select GPIO_GENERIC select IRQ_DOMAIN help diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c index 5321ef98f442..64908f1a5e7f 100644 --- a/drivers/gpio/gpio-bcm-kona.c +++ b/drivers/gpio/gpio-bcm-kona.c @@ -69,6 +69,22 @@ struct bcm_kona_gpio { struct bcm_kona_gpio_bank { int id; int irq; + /* + * Used to keep track of lock/unlock operations for each GPIO in the + * bank. + * + * All GPIOs are locked by default (see bcm_kona_gpio_reset), and the + * unlock count for all GPIOs is 0 by default. Each unlock increments + * the counter, and each lock decrements the counter. + * + * The lock function only locks the GPIO once its unlock counter is + * down to 0. This is necessary because the GPIO is unlocked in two + * places in this driver: once for requested GPIOs, and once for + * requested IRQs. Since it is possible for a GPIO to be requested + * as both a GPIO and an IRQ, we need to ensure that we don't lock it + * too early. + */ + u8 gpio_unlock_count[GPIO_PER_BANK]; /* Used in the interrupt handler */ struct bcm_kona_gpio *kona_gpio; }; @@ -86,14 +102,24 @@ static void bcm_kona_gpio_lock_gpio(struct bcm_kona_gpio *kona_gpio, u32 val; unsigned long flags; int bank_id = GPIO_BANK(gpio); + int bit = GPIO_BIT(gpio); + struct bcm_kona_gpio_bank *bank = &kona_gpio->banks[bank_id]; - raw_spin_lock_irqsave(&kona_gpio->lock, flags); + if (bank->gpio_unlock_count[bit] == 0) { + dev_err(kona_gpio->gpio_chip.parent, + "Unbalanced locks for GPIO %u\n", gpio); + return; + } - val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); - val |= BIT(gpio); - bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); + if (--bank->gpio_unlock_count[bit] == 0) { + raw_spin_lock_irqsave(&kona_gpio->lock, flags); - raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); + val |= BIT(bit); + bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); + + raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + } } static void bcm_kona_gpio_unlock_gpio(struct bcm_kona_gpio *kona_gpio, @@ -102,14 +128,20 @@ static void bcm_kona_gpio_unlock_gpio(struct bcm_kona_gpio *kona_gpio, u32 val; unsigned long flags; int bank_id = GPIO_BANK(gpio); + int bit = GPIO_BIT(gpio); + struct bcm_kona_gpio_bank *bank = &kona_gpio->banks[bank_id]; - raw_spin_lock_irqsave(&kona_gpio->lock, flags); + if (bank->gpio_unlock_count[bit] == 0) { + raw_spin_lock_irqsave(&kona_gpio->lock, flags); - val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); - val &= ~BIT(gpio); - bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); + val = readl(kona_gpio->reg_base + GPIO_PWD_STATUS(bank_id)); + val &= ~BIT(bit); + bcm_kona_gpio_write_lock_regs(kona_gpio->reg_base, bank_id, val); - raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + raw_spin_unlock_irqrestore(&kona_gpio->lock, flags); + } + + ++bank->gpio_unlock_count[bit]; } static int bcm_kona_gpio_get_dir(struct gpio_chip *chip, unsigned gpio) @@ -360,6 +392,7 @@ static void bcm_kona_gpio_irq_mask(struct irq_data *d) kona_gpio = irq_data_get_irq_chip_data(d); reg_base = kona_gpio->reg_base; + raw_spin_lock_irqsave(&kona_gpio->lock, flags); val = readl(reg_base + GPIO_INT_MASK(bank_id)); @@ -382,6 +415,7 @@ static void bcm_kona_gpio_irq_unmask(struct irq_data *d) kona_gpio = irq_data_get_irq_chip_data(d); reg_base = kona_gpio->reg_base; + raw_spin_lock_irqsave(&kona_gpio->lock, flags); val = readl(reg_base + GPIO_INT_MSKCLR(bank_id)); @@ -477,15 +511,26 @@ static void bcm_kona_gpio_irq_handler(struct irq_desc *desc) static int bcm_kona_gpio_irq_reqres(struct irq_data *d) { struct bcm_kona_gpio *kona_gpio = irq_data_get_irq_chip_data(d); + unsigned int gpio = d->hwirq; - return gpiochip_reqres_irq(&kona_gpio->gpio_chip, d->hwirq); + /* + * We need to unlock the GPIO before any other operations are performed + * on the relevant GPIO configuration registers + */ + bcm_kona_gpio_unlock_gpio(kona_gpio, gpio); + + return gpiochip_reqres_irq(&kona_gpio->gpio_chip, gpio); } static void bcm_kona_gpio_irq_relres(struct irq_data *d) { struct bcm_kona_gpio *kona_gpio = irq_data_get_irq_chip_data(d); + unsigned int gpio = d->hwirq; + + /* Once we no longer use it, lock the GPIO again */ + bcm_kona_gpio_lock_gpio(kona_gpio, gpio); - gpiochip_relres_irq(&kona_gpio->gpio_chip, d->hwirq); + gpiochip_relres_irq(&kona_gpio->gpio_chip, gpio); } static struct irq_chip bcm_gpio_irq_chip = { @@ -614,7 +659,7 @@ static int bcm_kona_gpio_probe(struct platform_device *pdev) bank->irq = platform_get_irq(pdev, i); bank->kona_gpio = kona_gpio; if (bank->irq < 0) { - dev_err(dev, "Couldn't get IRQ for bank %d", i); + dev_err(dev, "Couldn't get IRQ for bank %d\n", i); ret = -ENOENT; goto err_irq_domain; } diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index be4c9981ebc4..d63c1030e6ac 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -841,25 +841,6 @@ static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pendin DECLARE_BITMAP(trigger, MAX_LINE); int ret; - if (chip->driver_data & PCA_PCAL) { - /* Read the current interrupt status from the device */ - ret = pca953x_read_regs(chip, PCAL953X_INT_STAT, trigger); - if (ret) - return false; - - /* Check latched inputs and clear interrupt status */ - ret = pca953x_read_regs(chip, chip->regs->input, cur_stat); - if (ret) - return false; - - /* Apply filter for rising/falling edge selection */ - bitmap_replace(new_stat, chip->irq_trig_fall, chip->irq_trig_raise, cur_stat, gc->ngpio); - - bitmap_and(pending, new_stat, trigger, gc->ngpio); - - return !bitmap_empty(pending, gc->ngpio); - } - ret = pca953x_read_regs(chip, chip->regs->input, cur_stat); if (ret) return false; diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index a086087ada17..b6c230fab840 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -1028,20 +1028,23 @@ gpio_sim_device_lockup_configfs(struct gpio_sim_device *dev, bool lock) struct configfs_subsystem *subsys = dev->group.cg_subsys; struct gpio_sim_bank *bank; struct gpio_sim_line *line; + struct config_item *item; /* - * The device only needs to depend on leaf line entries. This is + * The device only needs to depend on leaf entries. This is * sufficient to lock up all the configfs entries that the * instantiated, alive device depends on. */ list_for_each_entry(bank, &dev->bank_list, siblings) { list_for_each_entry(line, &bank->line_list, siblings) { + item = line->hog ? &line->hog->item + : &line->group.cg_item; + if (lock) - WARN_ON(configfs_depend_item_unlocked( - subsys, &line->group.cg_item)); + WARN_ON(configfs_depend_item_unlocked(subsys, + item)); else - configfs_undepend_item_unlocked( - &line->group.cg_item); + configfs_undepend_item_unlocked(item); } } } diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index 75a3633ceddb..222279a9d82b 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -191,7 +191,7 @@ static void stmpe_gpio_irq_sync_unlock(struct irq_data *d) [REG_IE][CSB] = STMPE_IDX_IEGPIOR_CSB, [REG_IE][MSB] = STMPE_IDX_IEGPIOR_MSB, }; - int i, j; + int ret, i, j; /* * STMPE1600: to be able to get IRQ from pins, @@ -199,8 +199,16 @@ static void stmpe_gpio_irq_sync_unlock(struct irq_data *d) * GPSR or GPCR registers */ if (stmpe->partnum == STMPE1600) { - stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_LSB]); - stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_CSB]); + ret = stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_LSB]); + if (ret < 0) { + dev_err(stmpe->dev, "Failed to read GPMR_LSB: %d\n", ret); + goto err; + } + ret = stmpe_reg_read(stmpe, stmpe->regs[STMPE_IDX_GPMR_CSB]); + if (ret < 0) { + dev_err(stmpe->dev, "Failed to read GPMR_CSB: %d\n", ret); + goto err; + } } for (i = 0; i < CACHE_NR_REGS; i++) { @@ -222,6 +230,7 @@ static void stmpe_gpio_irq_sync_unlock(struct irq_data *d) } } +err: mutex_unlock(&stmpe_gpio->irq_lock); } diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index c4f34a347cb6..c36a9dbccd4d 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -36,6 +36,7 @@ struct vf610_gpio_port { struct clk *clk_port; struct clk *clk_gpio; int irq; + spinlock_t lock; /* protect gpio direction registers */ }; #define GPIO_PDOR 0x00 @@ -124,6 +125,7 @@ static int vf610_gpio_direction_input(struct gpio_chip *chip, unsigned int gpio) u32 val; if (port->sdata->have_paddr) { + guard(spinlock_irqsave)(&port->lock); val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR); val &= ~mask; vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); @@ -142,6 +144,7 @@ static int vf610_gpio_direction_output(struct gpio_chip *chip, unsigned int gpio vf610_gpio_set(chip, gpio, value); if (port->sdata->have_paddr) { + guard(spinlock_irqsave)(&port->lock); val = vf610_gpio_readl(port->gpio_base + GPIO_PDDR); val |= mask; vf610_gpio_writel(val, port->gpio_base + GPIO_PDDR); @@ -297,6 +300,7 @@ static int vf610_gpio_probe(struct platform_device *pdev) return -ENOMEM; port->sdata = device_get_match_data(dev); + spin_lock_init(&port->lock); dual_base = port->sdata->have_dual_base; diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 1f9fe50bba00..f7746c57ba76 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1689,6 +1689,20 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_wake = "PNP0C50:00@8", }, }, + { + /* + * Spurious wakeups from GPIO 11 + * Found in BIOS 1.04 + * https://gitlab.freedesktop.org/drm/amd/-/issues/3954 + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "Acer Nitro V 14"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_interrupt = "AMDI0030:00@11", + }, + }, {} /* Terminating entry */ }; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 679ed764cb14..8741600af7ef 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -904,13 +904,13 @@ int gpiochip_get_ngpios(struct gpio_chip *gc, struct device *dev) } if (gc->ngpio == 0) { - chip_err(gc, "tried to insert a GPIO chip with zero lines\n"); + dev_err(dev, "tried to insert a GPIO chip with zero lines\n"); return -EINVAL; } if (gc->ngpio > FASTPATH_NGPIO) - chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n", - gc->ngpio, FASTPATH_NGPIO); + dev_warn(dev, "line cnt %u is greater than fast path cnt %u\n", + gc->ngpio, FASTPATH_NGPIO); return 0; } @@ -1057,8 +1057,19 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, desc->gdev = gdev; if (gc->get_direction && gpiochip_line_is_valid(gc, desc_index)) { - assign_bit(FLAG_IS_OUT, - &desc->flags, !gc->get_direction(gc, desc_index)); + ret = gc->get_direction(gc, desc_index); + if (ret < 0) + /* + * FIXME: Bail-out here once all GPIO drivers + * are updated to not return errors in + * situations that can be considered normal + * operation. + */ + dev_warn(&gdev->dev, + "%s: get_direction failed: %d\n", + __func__, ret); + + assign_bit(FLAG_IS_OUT, &desc->flags, !ret); } else { assign_bit(FLAG_IS_OUT, &desc->flags, !gc->direction_input); @@ -2701,7 +2712,7 @@ EXPORT_SYMBOL_GPL(gpiod_direction_input); int gpiod_direction_input_nonotify(struct gpio_desc *desc) { - int ret = 0; + int ret = 0, dir; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) @@ -2728,13 +2739,18 @@ int gpiod_direction_input_nonotify(struct gpio_desc *desc) if (guard.gc->direction_input) { ret = guard.gc->direction_input(guard.gc, gpio_chip_hwgpio(desc)); - } else if (guard.gc->get_direction && - (guard.gc->get_direction(guard.gc, - gpio_chip_hwgpio(desc)) != 1)) { - gpiod_warn(desc, - "%s: missing direction_input() operation and line is output\n", - __func__); - return -EIO; + } else if (guard.gc->get_direction) { + dir = guard.gc->get_direction(guard.gc, + gpio_chip_hwgpio(desc)); + if (dir < 0) + return dir; + + if (dir != GPIO_LINE_DIRECTION_IN) { + gpiod_warn(desc, + "%s: missing direction_input() operation and line is output\n", + __func__); + return -EIO; + } } if (ret == 0) { clear_bit(FLAG_IS_OUT, &desc->flags); @@ -2748,7 +2764,7 @@ int gpiod_direction_input_nonotify(struct gpio_desc *desc) static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) { - int val = !!value, ret = 0; + int val = !!value, ret = 0, dir; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) @@ -2771,12 +2787,18 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) gpio_chip_hwgpio(desc), val); } else { /* Check that we are in output mode if we can */ - if (guard.gc->get_direction && - guard.gc->get_direction(guard.gc, gpio_chip_hwgpio(desc))) { - gpiod_warn(desc, - "%s: missing direction_output() operation\n", - __func__); - return -EIO; + if (guard.gc->get_direction) { + dir = guard.gc->get_direction(guard.gc, + gpio_chip_hwgpio(desc)); + if (dir < 0) + return dir; + + if (dir != GPIO_LINE_DIRECTION_OUT) { + gpiod_warn(desc, + "%s: missing direction_output() operation\n", + __func__); + return -EIO; + } } /* * If we can't actively set the direction, we are some @@ -3129,6 +3151,8 @@ static int gpiod_get_raw_value_commit(const struct gpio_desc *desc) static int gpio_chip_get_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { + lockdep_assert_held(&gc->gpiodev->srcu); + if (gc->get_multiple) return gc->get_multiple(gc, mask, bits); if (gc->get) { @@ -3159,6 +3183,7 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, struct gpio_array *array_info, unsigned long *value_bitmap) { + struct gpio_chip *gc; int ret, i = 0; /* @@ -3170,10 +3195,15 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep, array_size <= array_info->size && (void *)array_info == desc_array + array_info->size) { if (!can_sleep) - WARN_ON(array_info->chip->can_sleep); + WARN_ON(array_info->gdev->can_sleep); + + guard(srcu)(&array_info->gdev->srcu); + gc = srcu_dereference(array_info->gdev->chip, + &array_info->gdev->srcu); + if (!gc) + return -ENODEV; - ret = gpio_chip_get_multiple(array_info->chip, - array_info->get_mask, + ret = gpio_chip_get_multiple(gc, array_info->get_mask, value_bitmap); if (ret) return ret; @@ -3454,6 +3484,8 @@ static void gpiod_set_raw_value_commit(struct gpio_desc *desc, bool value) static void gpio_chip_set_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { + lockdep_assert_held(&gc->gpiodev->srcu); + if (gc->set_multiple) { gc->set_multiple(gc, mask, bits); } else { @@ -3471,6 +3503,7 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep, struct gpio_array *array_info, unsigned long *value_bitmap) { + struct gpio_chip *gc; int i = 0; /* @@ -3482,14 +3515,19 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep, array_size <= array_info->size && (void *)array_info == desc_array + array_info->size) { if (!can_sleep) - WARN_ON(array_info->chip->can_sleep); + WARN_ON(array_info->gdev->can_sleep); + + guard(srcu)(&array_info->gdev->srcu); + gc = srcu_dereference(array_info->gdev->chip, + &array_info->gdev->srcu); + if (!gc) + return -ENODEV; if (!raw && !bitmap_empty(array_info->invert_mask, array_size)) bitmap_xor(value_bitmap, value_bitmap, array_info->invert_mask, array_size); - gpio_chip_set_multiple(array_info->chip, array_info->set_mask, - value_bitmap); + gpio_chip_set_multiple(gc, array_info->set_mask, value_bitmap); i = find_first_zero_bit(array_info->set_mask, array_size); if (i == array_size) @@ -4751,9 +4789,10 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, { struct gpio_desc *desc; struct gpio_descs *descs; + struct gpio_device *gdev; struct gpio_array *array_info = NULL; - struct gpio_chip *gc; int count, bitmap_size; + unsigned long dflags; size_t descs_size; count = gpiod_count(dev, con_id); @@ -4774,7 +4813,7 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, descs->desc[descs->ndescs] = desc; - gc = gpiod_to_chip(desc); + gdev = gpiod_to_gpio_device(desc); /* * If pin hardware number of array member 0 is also 0, select * its chip as a candidate for fast bitmap processing path. @@ -4782,8 +4821,8 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, if (descs->ndescs == 0 && gpio_chip_hwgpio(desc) == 0) { struct gpio_descs *array; - bitmap_size = BITS_TO_LONGS(gc->ngpio > count ? - gc->ngpio : count); + bitmap_size = BITS_TO_LONGS(gdev->ngpio > count ? + gdev->ngpio : count); array = krealloc(descs, descs_size + struct_size(array_info, invert_mask, 3 * bitmap_size), @@ -4803,7 +4842,7 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, array_info->desc = descs->desc; array_info->size = count; - array_info->chip = gc; + array_info->gdev = gdev; bitmap_set(array_info->get_mask, descs->ndescs, count - descs->ndescs); bitmap_set(array_info->set_mask, descs->ndescs, @@ -4816,7 +4855,7 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, continue; /* Unmark array members which don't belong to the 'fast' chip */ - if (array_info->chip != gc) { + if (array_info->gdev != gdev) { __clear_bit(descs->ndescs, array_info->get_mask); __clear_bit(descs->ndescs, array_info->set_mask); } @@ -4839,9 +4878,10 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, array_info->set_mask); } } else { + dflags = READ_ONCE(desc->flags); /* Exclude open drain or open source from fast output */ - if (gpiochip_line_is_open_drain(gc, descs->ndescs) || - gpiochip_line_is_open_source(gc, descs->ndescs)) + if (test_bit(FLAG_OPEN_DRAIN, &dflags) || + test_bit(FLAG_OPEN_SOURCE, &dflags)) __clear_bit(descs->ndescs, array_info->set_mask); /* Identify 'fast' pins which require invertion */ @@ -4853,7 +4893,7 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, if (array_info) dev_dbg(dev, "GPIO array info: chip=%s, size=%d, get_mask=%lx, set_mask=%lx, invert_mask=%lx\n", - array_info->chip->label, array_info->size, + array_info->gdev->label, array_info->size, *array_info->get_mask, *array_info->set_mask, *array_info->invert_mask); return descs; diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index 83690f72f7e5..147156ec502b 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -114,7 +114,7 @@ extern const char *const gpio_suffixes[]; * * @desc: Array of pointers to the GPIO descriptors * @size: Number of elements in desc - * @chip: Parent GPIO chip + * @gdev: Parent GPIO device * @get_mask: Get mask used in fastpath * @set_mask: Set mask used in fastpath * @invert_mask: Invert mask used in fastpath @@ -126,7 +126,7 @@ extern const char *const gpio_suffixes[]; struct gpio_array { struct gpio_desc **desc; unsigned int size; - struct gpio_chip *chip; + struct gpio_device *gdev; unsigned long *get_mask; unsigned long *set_mask; unsigned long invert_mask[]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d100bb7a137c..018dfccd771b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1638,6 +1638,13 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + /* resizing on Dell G5 SE platforms causes problems with runtime pm */ + if ((amdgpu_runtime_pm != 0) && + adev->pdev->vendor == PCI_VENDOR_ID_ATI && + adev->pdev->device == 0x731f && + adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL) + return 0; + /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) DRM_WARN("System can't access extended configuration space, please check!!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 817116e53d44..95a05b03f799 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -119,9 +119,11 @@ * - 3.57.0 - Compute tunneling on GFX10+ * - 3.58.0 - Add GFX12 DCC support * - 3.59.0 - Cleared VRAM + * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement) + * - 3.61.0 - Contains fix for RV/PCO compute queues */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 59 +#define KMS_DRIVER_MINOR 61 #define KMS_DRIVER_PATCHLEVEL 0 /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 784b03abb3a4..c1f35ded684e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1638,22 +1638,19 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, } mutex_lock(&adev->enforce_isolation_mutex); - for (i = 0; i < num_partitions; i++) { - if (adev->enforce_isolation[i] && !partition_values[i]) { + if (adev->enforce_isolation[i] && !partition_values[i]) /* Going from enabled to disabled */ amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); - amdgpu_mes_set_enforce_isolation(adev, i, false); - } else if (!adev->enforce_isolation[i] && partition_values[i]) { + else if (!adev->enforce_isolation[i] && partition_values[i]) /* Going from disabled to enabled */ amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); - amdgpu_mes_set_enforce_isolation(adev, i, true); - } adev->enforce_isolation[i] = partition_values[i]; } - mutex_unlock(&adev->enforce_isolation_mutex); + amdgpu_mes_update_enforce_isolation(adev); + return count; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 32b27a1658e7..709c11cbeabd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1681,7 +1681,8 @@ bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) } /* Fix me -- node_id is used to identify the correct MES instances in the future */ -int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable) +static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, + uint32_t node_id, bool enable) { struct mes_misc_op_input op_input = {0}; int r; @@ -1703,6 +1704,23 @@ error: return r; } +int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) +{ + int i, r = 0; + + if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { + mutex_lock(&adev->enforce_isolation_mutex); + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + if (adev->enforce_isolation[i]) + r |= amdgpu_mes_set_enforce_isolation(adev, i, true); + else + r |= amdgpu_mes_set_enforce_isolation(adev, i, false); + } + mutex_unlock(&adev->enforce_isolation_mutex); + } + return r; +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 2df2444ee892..e98ea7ede1ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -534,6 +534,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); -int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable); +int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev); #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index babe94ade247..e5fc80ed06ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -3815,9 +3815,10 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name) if (err == -ENODEV) { dev_warn(adev->dev, "cap microcode does not exist, skip\n"); err = 0; - goto out; + } else { + dev_err(adev->dev, "fail to initialize cap microcode\n"); } - dev_err(adev->dev, "fail to initialize cap microcode\n"); + goto out; } info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ff286940ab43..262bd010a283 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -309,7 +309,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, mutex_lock(&adev->mman.gtt_window_lock); while (src_mm.remaining) { uint64_t from, to, cur_size, tiling_flags; - uint32_t num_type, data_format, max_com; + uint32_t num_type, data_format, max_com, write_compress_disable; struct dma_fence *next; /* Never copy more than 256MiB at once to avoid a timeout */ @@ -340,9 +340,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK); num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE); data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT); + write_compress_disable = + AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE); copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) | AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) | - AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format)); + AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) | + AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE, + write_compress_disable)); } r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, @@ -2277,7 +2281,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_res_cursor cursor; u64 addr; - int r; + int r = 0; if (!adev->mman.buffer_funcs_enabled) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 461fb8090ae0..208b7d1d8a27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -119,6 +119,8 @@ struct amdgpu_copy_mem { #define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07 #define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8 #define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f +#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_MASK 0x1 #define AMDGPU_COPY_FLAGS_SET(field, value) \ (((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index fa572b40989e..0dce4421418c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -7437,6 +7437,38 @@ static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ } +static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ip_block *gfx_block = + amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + + amdgpu_gfx_enforce_isolation_ring_begin_use(ring); + + /* Raven and PCO APUs seem to have stability issues + * with compute and gfxoff and gfx pg. Disable gfx pg during + * submission and allow again afterwards. + */ + if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) + gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE); +} + +static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ip_block *gfx_block = + amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + + /* Raven and PCO APUs seem to have stability issues + * with compute and gfxoff and gfx pg. Disable gfx pg during + * submission and allow again afterwards. + */ + if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) + gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE); + + amdgpu_gfx_enforce_isolation_ring_end_use(ring); +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -7613,8 +7645,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_wave_limit = gfx_v9_0_emit_wave_limit, .reset = gfx_v9_0_reset_kcq, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, - .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, - .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, + .begin_use = gfx_v9_0_ring_begin_use_compute, + .end_use = gfx_v9_0_ring_end_use_compute, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 65f389eb65e5..f9a4d08eef92 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1633,6 +1633,10 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) goto failure; } + r = amdgpu_mes_update_enforce_isolation(adev); + if (r) + goto failure; + out: /* * Disable KIQ ring usage from the driver once MES is enabled. diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 901e924e69ad..0fd0fa6ed518 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -1743,6 +1743,10 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) goto failure; } + r = amdgpu_mes_update_enforce_isolation(adev); + if (r) + goto failure; + out: /* * Disable KIQ ring usage from the driver once MES is enabled. diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 9c17df2cf37b..7e10e94624e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1741,11 +1741,12 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, uint32_t byte_count, uint32_t copy_flags) { - uint32_t num_type, data_format, max_com; + uint32_t num_type, data_format, max_com, write_cm; max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED); data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT); num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE); + write_cm = AMDGPU_COPY_FLAGS_GET(copy_flags, WRITE_COMPRESS_DISABLE) ? 2 : 1; ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | @@ -1762,7 +1763,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED))) ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) | ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | - ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | + ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(write_cm) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); else ib->ptr[ib->length_dw++] = 0; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 984f0e705078..651660958e5b 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -4121,7 +4121,8 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0x0000ffff, 0x8bfe7e7e, 0x8bea6a6a, 0xb97af804, 0xbe804ec2, 0xbf94fffe, - 0xbe804a6c, 0xbfb10000, + 0xbe804a6c, 0xbe804ec2, + 0xbf94fffe, 0xbfb10000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm index 1740e98c6719..7b9d36e5fa43 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm @@ -1049,6 +1049,10 @@ L_SKIP_BARRIER_RESTORE: s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution L_END_PGM: + // Make sure that no wave of the workgroup can exit the trap handler + // before the workgroup barrier state is saved. + s_barrier_signal -2 + s_barrier_wait -2 s_endpgm_saved end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 2eff37aaf827..1695dd78ede8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -107,6 +107,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -167,10 +169,10 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c index 68dbc0399c87..3c0ae28c5923 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c @@ -154,6 +154,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -221,10 +223,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c index 2b72d5b4949b..565858b9044d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v12.c @@ -121,6 +121,8 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x55 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -184,10 +186,9 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1; - m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index ff417d5361c4..3014925d95ff 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -183,6 +183,9 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK; + m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; m->cp_mqd_base_addr_lo = lower_32_bits(addr); @@ -245,7 +248,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m = get_mqd(mqd); - m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control &= ~CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK; m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index bcddd989c7f3..bd36a75309e1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -300,7 +300,7 @@ static int init_user_queue(struct process_queue_manager *pqm, return 0; free_gang_ctx_bo: - amdgpu_amdkfd_free_gtt_mem(dev->adev, (*q)->gang_ctx_bo); + amdgpu_amdkfd_free_gtt_mem(dev->adev, &(*q)->gang_ctx_bo); cleanup: uninit_queue(*q); *q = NULL; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ac3fd81fecef..9d9645a2d18e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1618,75 +1618,130 @@ static bool dm_should_disable_stutter(struct pci_dev *pdev) return false; } -static const struct dmi_system_id hpd_disconnect_quirk_table[] = { +struct amdgpu_dm_quirks { + bool aux_hpd_discon; + bool support_edp0_on_dp1; +}; + +static struct amdgpu_dm_quirks quirk_entries = { + .aux_hpd_discon = false, + .support_edp0_on_dp1 = false +}; + +static int edp0_on_dp1_callback(const struct dmi_system_id *id) +{ + quirk_entries.support_edp0_on_dp1 = true; + return 0; +} + +static int aux_hpd_discon_callback(const struct dmi_system_id *id) +{ + quirk_entries.aux_hpd_discon = true; + return 0; +} + +static const struct dmi_system_id dmi_quirk_table[] = { { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"), }, }, { + .callback = aux_hpd_discon_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"), }, }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite mt645 G8 Mobile Thin Client"), + }, + }, + { + .callback = edp0_on_dp1_callback, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP EliteBook 665 16 inch G11 Notebook PC"), + }, + }, {} /* TODO: refactor this from a fixed table to a dynamic option */ }; -static void retrieve_dmi_info(struct amdgpu_display_manager *dm) +static void retrieve_dmi_info(struct amdgpu_display_manager *dm, struct dc_init_data *init_data) { - const struct dmi_system_id *dmi_id; + int dmi_id; + struct drm_device *dev = dm->ddev; dm->aux_hpd_discon_quirk = false; + init_data->flags.support_edp0_on_dp1 = false; + + dmi_id = dmi_check_system(dmi_quirk_table); + + if (!dmi_id) + return; - dmi_id = dmi_first_match(hpd_disconnect_quirk_table); - if (dmi_id) { + if (quirk_entries.aux_hpd_discon) { dm->aux_hpd_discon_quirk = true; - DRM_INFO("aux_hpd_discon_quirk attached\n"); + drm_info(dev, "aux_hpd_discon_quirk attached\n"); + } + if (quirk_entries.support_edp0_on_dp1) { + init_data->flags.support_edp0_on_dp1 = true; + drm_info(dev, "aux_hpd_discon_quirk attached\n"); } } @@ -1994,7 +2049,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0)) init_data.num_virtual_links = 1; - retrieve_dmi_info(&adev->dm); + retrieve_dmi_info(&adev->dm, &init_data); if (adev->dm.bb_from_dmub) init_data.bb_from_dmub = adev->dm.bb_from_dmub; @@ -7240,8 +7295,14 @@ static void amdgpu_dm_connector_funcs_force(struct drm_connector *connector) struct dc_link *dc_link = aconnector->dc_link; struct dc_sink *dc_em_sink = aconnector->dc_em_sink; const struct drm_edid *drm_edid; + struct i2c_adapter *ddc; + + if (dc_link && dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; - drm_edid = drm_edid_read(connector); + drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); if (!drm_edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); @@ -7286,14 +7347,21 @@ static int get_modes(struct drm_connector *connector) static void create_eml_sink(struct amdgpu_dm_connector *aconnector) { struct drm_connector *connector = &aconnector->base; + struct dc_link *dc_link = aconnector->dc_link; struct dc_sink_init_data init_params = { .link = aconnector->dc_link, .sink_signal = SIGNAL_TYPE_VIRTUAL }; const struct drm_edid *drm_edid; const struct edid *edid; + struct i2c_adapter *ddc; + + if (dc_link && dc_link->aux_mode) + ddc = &aconnector->dm_dp_aux.aux.ddc; + else + ddc = &aconnector->i2c->base; - drm_edid = drm_edid_read(connector); + drm_edid = drm_edid_read_ddc(connector, ddc); drm_edid_connector_update(connector, drm_edid); if (!drm_edid) { DRM_ERROR("No EDID found on connector: %s.\n", connector->name); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c index 3390f0d8420a..c4a7fd453e5f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c @@ -894,6 +894,7 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; + int i; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -920,6 +921,12 @@ void amdgpu_dm_hpd_init(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); + + /* Update reference counts for HPDs */ + for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { + if (amdgpu_irq_get(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) + drm_err(dev, "DM_IRQ: Failed get HPD for source=%d)!\n", i); + } } /** @@ -935,6 +942,7 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) struct drm_device *dev = adev_to_drm(adev); struct drm_connector *connector; struct drm_connector_list_iter iter; + int i; drm_connector_list_iter_begin(dev, &iter); drm_for_each_connector_iter(connector, &iter) { @@ -960,4 +968,10 @@ void amdgpu_dm_hpd_fini(struct amdgpu_device *adev) } } drm_connector_list_iter_end(&iter); + + /* Update reference counts for HPDs */ + for (i = DC_IRQ_SOURCE_HPD1; i <= adev->mode_info.num_hpd; i++) { + if (amdgpu_irq_put(adev, &adev->hpd_irq, i - DC_IRQ_SOURCE_HPD1)) + drm_err(dev, "DM_IRQ: Failed put HPD for source=%d!\n", i); + } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 45858bf1523d..e140b7a04d72 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -54,7 +54,8 @@ static bool link_supports_psrsu(struct dc_link *link) if (amdgpu_dc_debug_mask & DC_DISABLE_PSR_SU) return false; - return dc_dmub_check_min_version(dc->ctx->dmub_srv->dmub); + /* Temporarily disable PSR-SU to avoid glitches */ + return false; } /* diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index cecaadf741ad..f84e795e35f5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2133,7 +2133,7 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c dc_enable_stereo(dc, context, dc_streams, context->stream_count); - if (context->stream_count > get_seamless_boot_stream_count(context) || + if (get_seamless_boot_stream_count(context) == 0 || context->stream_count == 0) { /* Must wait for no flips to be pending before doing optimize bw */ hwss_wait_for_no_pipes_pending(dc, context); diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index 5bb8b78bf250..bf636b28e3e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -63,8 +63,7 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, bool should_use_dmub_lock(struct dc_link *link) { - if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 || - link->psr_settings.psr_version == DC_PSR_VERSION_1) + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) return true; if (link->replay_settings.replay_feature_enabled) diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 46f9c05de16e..e1d500633dfa 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -29,11 +29,15 @@ dml_ccflags := $(CC_FLAGS_FPU) dml_rcflags := $(CC_FLAGS_NO_FPU) ifneq ($(CONFIG_FRAME_WARN),0) -ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) -frame_warn_flag := -Wframe-larger-than=3072 -else -frame_warn_flag := -Wframe-larger-than=2048 -endif + ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) + frame_warn_limit := 3072 + else + frame_warn_limit := 2048 + endif + + ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y) + frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit) + endif endif CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags) diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index 91c4f3b4bd5f..21fd466dba26 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -28,15 +28,19 @@ dml2_ccflags := $(CC_FLAGS_FPU) dml2_rcflags := $(CC_FLAGS_NO_FPU) ifneq ($(CONFIG_FRAME_WARN),0) -ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) -ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) -frame_warn_flag := -Wframe-larger-than=4096 -else -frame_warn_flag := -Wframe-larger-than=3072 -endif -else -frame_warn_flag := -Wframe-larger-than=2048 -endif + ifeq ($(filter y,$(CONFIG_KASAN)$(CONFIG_KCSAN)),y) + ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_COMPILE_TEST),yy) + frame_warn_limit := 4096 + else + frame_warn_limit := 3072 + endif + else + frame_warn_limit := 2048 + endif + + ifeq ($(call test-lt, $(CONFIG_FRAME_WARN), $(frame_warn_limit)),y) + frame_warn_flag := -Wframe-larger-than=$(frame_warn_limit) + endif endif subdir-ccflags-y += -I$(FULL_AMD_DISPLAY_PATH)/dc/dml2 diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index b9c6b45f6872..0c8ec30ea672 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1017,7 +1017,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s if (disp_cfg_stream_location < 0) disp_cfg_stream_location = dml_dispcfg->num_streams++; - ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml21_timing_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, context->streams[stream_index], dml_ctx); adjust_dml21_hblank_timing_config_from_pipe_ctx(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].timing, &context->res_ctx.pipe_ctx[stream_index]); populate_dml21_output_config_from_stream_state(&dml_dispcfg->stream_descriptors[disp_cfg_stream_location].output, context->streams[stream_index], &context->res_ctx.pipe_ctx[stream_index]); @@ -1042,7 +1042,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_planes++; - ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml21_surface_config_from_plane_state(in_dc, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location].surface, context->stream_status[stream_index].plane_states[plane_index]); populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index b416320873e1..b8a34abaf519 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -786,7 +786,7 @@ static void populate_dml_output_cfg_from_stream_state(struct dml_output_cfg_st * case SIGNAL_TYPE_DISPLAY_PORT_MST: case SIGNAL_TYPE_DISPLAY_PORT: out->OutputEncoder[location] = dml_dp; - if (dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) + if (location < MAX_HPO_DP2_ENCODERS && dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location] != -1) out->OutputEncoder[dml2->v20.scratch.hpo_stream_to_link_encoder_mapping[location]] = dml_dp2p0; break; case SIGNAL_TYPE_EDP: @@ -1343,7 +1343,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (disp_cfg_stream_location < 0) disp_cfg_stream_location = dml_dispcfg->num_timings++; - ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_stream_location >= 0 && disp_cfg_stream_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_timing_cfg_from_stream_state(&dml_dispcfg->timing, disp_cfg_stream_location, context->streams[i]); populate_dml_output_cfg_from_stream_state(&dml_dispcfg->output, disp_cfg_stream_location, context->streams[i], current_pipe_context, dml2); @@ -1383,7 +1383,7 @@ void map_dc_state_into_dml_display_cfg(struct dml2_context *dml2, struct dc_stat if (disp_cfg_plane_location < 0) disp_cfg_plane_location = dml_dispcfg->num_surfaces++; - ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location <= __DML2_WRAPPER_MAX_STREAMS_PLANES__); + ASSERT(disp_cfg_plane_location >= 0 && disp_cfg_plane_location < __DML2_WRAPPER_MAX_STREAMS_PLANES__); populate_dml_surface_cfg_from_plane_state(dml2->v20.dml_core_ctx.project, &dml_dispcfg->surface, disp_cfg_plane_location, context->stream_status[i].plane_states[j]); populate_dml_plane_cfg_from_plane_state( diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c index fe741100c0f8..d347bb06577a 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn30/dcn30_hubbub.c @@ -129,7 +129,8 @@ bool hubbub3_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF); - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); return wm_pending; } diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c index 7fb5523f9722..b98505b240a7 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn31/dcn31_hubbub.c @@ -750,7 +750,8 @@ static bool hubbub31_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/ - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); return wm_pending; } diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c index 5264dc26cce1..32a6be543105 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn32/dcn32_hubbub.c @@ -786,7 +786,8 @@ static bool hubbub32_program_watermarks( REG_UPDATE(DCHUBBUB_ARB_DF_REQ_OUTSTAND, DCHUBBUB_ARB_MIN_REQ_OUTSTAND, 0x1FF);*/ - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); diff --git a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c index 5eb3da8d5206..dce7269959ce 100644 --- a/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/hubbub/dcn35/dcn35_hubbub.c @@ -326,7 +326,8 @@ static bool hubbub35_program_watermarks( DCHUBBUB_ARB_MIN_REQ_OUTSTAND_COMMIT_THRESHOLD, 0xA);/*hw delta*/ REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DCHUBBUB_ARB_MAX_QOS_COMMIT_THRESHOLD, 0xF); - hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); + if (safe_to_lower || hubbub->ctx->dc->debug.disable_stutter) + hubbub1_allow_self_refresh_control(hubbub, !hubbub->ctx->dc->debug.disable_stutter); hubbub32_force_usr_retraining_allow(hubbub, hubbub->ctx->dc->debug.force_usr_allow); diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c index be0ac613675a..0da70b50e86d 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c @@ -500,6 +500,8 @@ void hubp3_init(struct hubp *hubp) //hubp[i].HUBPREQ_DEBUG.HUBPREQ_DEBUG[26] = 1; REG_WRITE(HUBPREQ_DEBUG, 1 << 26); + REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0); + hubp_reset(hubp); } diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c index edd37898d550..f3a21c623f44 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c @@ -168,6 +168,8 @@ void hubp32_init(struct hubp *hubp) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_WRITE(HUBPREQ_DEBUG_DB, 1 << 8); + + REG_UPDATE(DCHUBP_CNTL, HUBP_TTU_DISABLE, 0); } static struct hubp_funcs dcn32_hubp_funcs = { .hubp_enable_tripleBuffer = hubp2_enable_triplebuffer, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index 623cde76debf..b907ad1acedd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -236,7 +236,8 @@ void dcn35_init_hw(struct dc *dc) } hws->funcs.init_pipes(dc, dc->current_state); - if (dc->res_pool->hubbub->funcs->allow_self_refresh_control) + if (dc->res_pool->hubbub->funcs->allow_self_refresh_control && + !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter) dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub, !dc->res_pool->hubbub->ctx->dc->debug.disable_stutter); } diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 6a9e26905edf..7a22aef6e59c 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -78,7 +78,7 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, int ret = 0; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; enum ip_power_state pwr_state = gate ? POWER_STATE_OFF : POWER_STATE_ON; - bool is_vcn = (block_type == AMD_IP_BLOCK_TYPE_UVD || block_type == AMD_IP_BLOCK_TYPE_VCN); + bool is_vcn = block_type == AMD_IP_BLOCK_TYPE_VCN; if (atomic_read(&adev->pm.pwr_state[block_type]) == pwr_state && (!is_vcn || adev->vcn.num_vcn_inst == 1)) { diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 67a8e22b1126..e237ea1185a7 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -3042,6 +3042,7 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_dpm) return 0; + mutex_lock(&adev->pm.mutex); kv_dpm_setup_asic(adev); ret = kv_dpm_enable(adev); if (ret) @@ -3049,6 +3050,8 @@ static int kv_dpm_hw_init(struct amdgpu_ip_block *ip_block) else adev->pm.dpm_enabled = true; amdgpu_legacy_dpm_compute_clocks(adev); + mutex_unlock(&adev->pm.mutex); + return ret; } @@ -3066,32 +3069,42 @@ static int kv_dpm_suspend(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + cancel_work_sync(&adev->pm.dpm.thermal.work); + if (adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); + adev->pm.dpm_enabled = false; /* disable dpm */ kv_dpm_disable(adev); /* reset the power state */ adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; + mutex_unlock(&adev->pm.mutex); } return 0; } static int kv_dpm_resume(struct amdgpu_ip_block *ip_block) { - int ret; + int ret = 0; struct amdgpu_device *adev = ip_block->adev; - if (adev->pm.dpm_enabled) { + if (!amdgpu_dpm) + return 0; + + if (!adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); /* asic init will reset to the boot state */ kv_dpm_setup_asic(adev); ret = kv_dpm_enable(adev); - if (ret) + if (ret) { adev->pm.dpm_enabled = false; - else + } else { adev->pm.dpm_enabled = true; - if (adev->pm.dpm_enabled) amdgpu_legacy_dpm_compute_clocks(adev); + } + mutex_unlock(&adev->pm.mutex); } - return 0; + return ret; } static bool kv_dpm_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c index e861355ebd75..c7518b13e787 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/legacy_dpm.c @@ -1009,9 +1009,12 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL; int temp, size = sizeof(temp); - if (!adev->pm.dpm_enabled) - return; + mutex_lock(&adev->pm.mutex); + if (!adev->pm.dpm_enabled) { + mutex_unlock(&adev->pm.mutex); + return; + } if (!pp_funcs->read_sensor(adev->powerplay.pp_handle, AMDGPU_PP_SENSOR_GPU_TEMP, (void *)&temp, @@ -1033,4 +1036,5 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work) adev->pm.dpm.state = dpm_state; amdgpu_legacy_dpm_compute_clocks(adev->powerplay.pp_handle); + mutex_unlock(&adev->pm.mutex); } diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index a87dcf0974bc..d6dfe2599ebe 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -7786,6 +7786,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_dpm) return 0; + mutex_lock(&adev->pm.mutex); si_dpm_setup_asic(adev); ret = si_dpm_enable(adev); if (ret) @@ -7793,6 +7794,7 @@ static int si_dpm_hw_init(struct amdgpu_ip_block *ip_block) else adev->pm.dpm_enabled = true; amdgpu_legacy_dpm_compute_clocks(adev); + mutex_unlock(&adev->pm.mutex); return ret; } @@ -7810,32 +7812,44 @@ static int si_dpm_suspend(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + cancel_work_sync(&adev->pm.dpm.thermal.work); + if (adev->pm.dpm_enabled) { + mutex_lock(&adev->pm.mutex); + adev->pm.dpm_enabled = false; /* disable dpm */ si_dpm_disable(adev); /* reset the power state */ adev->pm.dpm.current_ps = adev->pm.dpm.requested_ps = adev->pm.dpm.boot_ps; + mutex_unlock(&adev->pm.mutex); } + return 0; } static int si_dpm_resume(struct amdgpu_ip_block *ip_block) { - int ret; + int ret = 0; struct amdgpu_device *adev = ip_block->adev; - if (adev->pm.dpm_enabled) { + if (!amdgpu_dpm) + return 0; + + if (!adev->pm.dpm_enabled) { /* asic init will reset to the boot state */ + mutex_lock(&adev->pm.mutex); si_dpm_setup_asic(adev); ret = si_dpm_enable(adev); - if (ret) + if (ret) { adev->pm.dpm_enabled = false; - else + } else { adev->pm.dpm_enabled = true; - if (adev->pm.dpm_enabled) amdgpu_legacy_dpm_compute_clocks(adev); + } + mutex_unlock(&adev->pm.mutex); } - return 0; + + return ret; } static bool si_dpm_is_idle(void *handle) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8ca793c222ff..ed9dac00ebfb 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -612,7 +612,8 @@ static int smu_sys_set_pp_table(void *handle, return -EIO; } - if (!smu_table->hardcode_pptable) { + if (!smu_table->hardcode_pptable || smu_table->power_play_table_size < size) { + kfree(smu_table->hardcode_pptable); smu_table->hardcode_pptable = kzalloc(size, GFP_KERNEL); if (!smu_table->hardcode_pptable) return -ENOMEM; diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c index ebccb74306a7..f30b3d5eeca5 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_wb_connector.c @@ -160,6 +160,10 @@ static int komeda_wb_connector_add(struct komeda_kms_dev *kms, formats = komeda_get_layer_fourcc_list(&mdev->fmt_tbl, kwb_conn->wb_layer->layer_type, &n_formats); + if (!formats) { + kfree(kwb_conn); + return -ENOMEM; + } err = drm_writeback_connector_init(&kms->base, wb_conn, &komeda_wb_connector_funcs, diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 0e282b7b167c..b9eb67e3fa90 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -195,7 +195,7 @@ static bool __ast_dp_wait_enable(struct ast_device *ast, bool enabled) if (enabled) vgacrdf_test |= AST_IO_VGACRDF_DP_VIDEO_ENABLE; - for (i = 0; i < 200; ++i) { + for (i = 0; i < 1000; ++i) { if (i) mdelay(1); vgacrdf = ast_get_index_reg_mask(ast, AST_IO_VGACRI, 0xdf, diff --git a/drivers/gpu/drm/display/drm_dp_cec.c b/drivers/gpu/drm/display/drm_dp_cec.c index 007ceb281d00..56a4965e518c 100644 --- a/drivers/gpu/drm/display/drm_dp_cec.c +++ b/drivers/gpu/drm/display/drm_dp_cec.c @@ -311,16 +311,6 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address) if (!aux->transfer) return; -#ifndef CONFIG_MEDIA_CEC_RC - /* - * CEC_CAP_RC is part of CEC_CAP_DEFAULTS, but it is stripped by - * cec_allocate_adapter() if CONFIG_MEDIA_CEC_RC is undefined. - * - * Do this here as well to ensure the tests against cec_caps are - * correct. - */ - cec_caps &= ~CEC_CAP_RC; -#endif cancel_delayed_work_sync(&aux->cec.unregister_work); mutex_lock(&aux->cec.lock); @@ -337,7 +327,9 @@ void drm_dp_cec_attach(struct drm_dp_aux *aux, u16 source_physical_address) num_las = CEC_MAX_LOG_ADDRS; if (aux->cec.adap) { - if (aux->cec.adap->capabilities == cec_caps && + /* Check if the adapter properties have changed */ + if ((aux->cec.adap->capabilities & CEC_CAP_MONITOR_ALL) == + (cec_caps & CEC_CAP_MONITOR_ALL) && aux->cec.adap->available_log_addrs == num_las) { /* Unchanged, so just set the phys addr */ cec_s_phys_addr(aux->cec.adap, source_physical_address, false); diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index da3c8521a7fa..61c7c2c588c6 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -2544,7 +2544,7 @@ u8 drm_dp_dsc_sink_bpp_incr(const u8 dsc_dpcd[DP_DSC_RECEIVER_CAP_SIZE]) { u8 bpp_increment_dpcd = dsc_dpcd[DP_DSC_BITS_PER_PIXEL_INC - DP_DSC_SUPPORT]; - switch (bpp_increment_dpcd) { + switch (bpp_increment_dpcd & DP_DSC_BITS_PER_PIXEL_MASK) { case DP_DSC_BITS_PER_PIXEL_1_16: return 16; case DP_DSC_BITS_PER_PIXEL_1_8: diff --git a/drivers/gpu/drm/drm_fbdev_dma.c b/drivers/gpu/drm/drm_fbdev_dma.c index b14b581c059d..02a516e77192 100644 --- a/drivers/gpu/drm/drm_fbdev_dma.c +++ b/drivers/gpu/drm/drm_fbdev_dma.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: MIT #include <linux/fb.h> +#include <linux/vmalloc.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> @@ -70,37 +71,102 @@ static const struct fb_ops drm_fbdev_dma_fb_ops = { .fb_destroy = drm_fbdev_dma_fb_destroy, }; -FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma, +FB_GEN_DEFAULT_DEFERRED_DMAMEM_OPS(drm_fbdev_dma_shadowed, drm_fb_helper_damage_range, drm_fb_helper_damage_area); -static int drm_fbdev_dma_deferred_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) +static void drm_fbdev_dma_shadowed_fb_destroy(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; - struct drm_framebuffer *fb = fb_helper->fb; - struct drm_gem_dma_object *dma = drm_fb_dma_get_gem_obj(fb, 0); + void *shadow = info->screen_buffer; + + if (!fb_helper->dev) + return; - if (!dma->map_noncoherent) - vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + if (info->fbdefio) + fb_deferred_io_cleanup(info); + drm_fb_helper_fini(fb_helper); + vfree(shadow); - return fb_deferred_io_mmap(info, vma); + drm_client_buffer_vunmap(fb_helper->buffer); + drm_client_framebuffer_delete(fb_helper->buffer); + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); } -static const struct fb_ops drm_fbdev_dma_deferred_fb_ops = { +static const struct fb_ops drm_fbdev_dma_shadowed_fb_ops = { .owner = THIS_MODULE, .fb_open = drm_fbdev_dma_fb_open, .fb_release = drm_fbdev_dma_fb_release, - __FB_DEFAULT_DEFERRED_OPS_RDWR(drm_fbdev_dma), + FB_DEFAULT_DEFERRED_OPS(drm_fbdev_dma_shadowed), DRM_FB_HELPER_DEFAULT_OPS, - __FB_DEFAULT_DEFERRED_OPS_DRAW(drm_fbdev_dma), - .fb_mmap = drm_fbdev_dma_deferred_fb_mmap, - .fb_destroy = drm_fbdev_dma_fb_destroy, + .fb_destroy = drm_fbdev_dma_shadowed_fb_destroy, }; /* * struct drm_fb_helper */ +static void drm_fbdev_dma_damage_blit_real(struct drm_fb_helper *fb_helper, + struct drm_clip_rect *clip, + struct iosys_map *dst) +{ + struct drm_framebuffer *fb = fb_helper->fb; + size_t offset = clip->y1 * fb->pitches[0]; + size_t len = clip->x2 - clip->x1; + unsigned int y; + void *src; + + switch (drm_format_info_bpp(fb->format, 0)) { + case 1: + offset += clip->x1 / 8; + len = DIV_ROUND_UP(len + clip->x1 % 8, 8); + break; + case 2: + offset += clip->x1 / 4; + len = DIV_ROUND_UP(len + clip->x1 % 4, 4); + break; + case 4: + offset += clip->x1 / 2; + len = DIV_ROUND_UP(len + clip->x1 % 2, 2); + break; + default: + offset += clip->x1 * fb->format->cpp[0]; + len *= fb->format->cpp[0]; + break; + } + + src = fb_helper->info->screen_buffer + offset; + iosys_map_incr(dst, offset); /* go to first pixel within clip rect */ + + for (y = clip->y1; y < clip->y2; y++) { + iosys_map_memcpy_to(dst, 0, src, len); + iosys_map_incr(dst, fb->pitches[0]); + src += fb->pitches[0]; + } +} + +static int drm_fbdev_dma_damage_blit(struct drm_fb_helper *fb_helper, + struct drm_clip_rect *clip) +{ + struct drm_client_buffer *buffer = fb_helper->buffer; + struct iosys_map dst; + + /* + * For fbdev emulation, we only have to protect against fbdev modeset + * operations. Nothing else will involve the client buffer's BO. So it + * is sufficient to acquire struct drm_fb_helper.lock here. + */ + mutex_lock(&fb_helper->lock); + + dst = buffer->map; + drm_fbdev_dma_damage_blit_real(fb_helper, clip, &dst); + + mutex_unlock(&fb_helper->lock); + + return 0; +} static int drm_fbdev_dma_helper_fb_dirty(struct drm_fb_helper *helper, struct drm_clip_rect *clip) { @@ -112,6 +178,10 @@ static int drm_fbdev_dma_helper_fb_dirty(struct drm_fb_helper *helper, return 0; if (helper->fb->funcs->dirty) { + ret = drm_fbdev_dma_damage_blit(helper, clip); + if (drm_WARN_ONCE(dev, ret, "Damage blitter failed: ret=%d\n", ret)) + return ret; + ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret)) return ret; @@ -128,14 +198,80 @@ static const struct drm_fb_helper_funcs drm_fbdev_dma_helper_funcs = { * struct drm_fb_helper */ +static int drm_fbdev_dma_driver_fbdev_probe_tail(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_device *dev = fb_helper->dev; + struct drm_client_buffer *buffer = fb_helper->buffer; + struct drm_gem_dma_object *dma_obj = to_drm_gem_dma_obj(buffer->gem); + struct drm_framebuffer *fb = fb_helper->fb; + struct fb_info *info = fb_helper->info; + struct iosys_map map = buffer->map; + + info->fbops = &drm_fbdev_dma_fb_ops; + + /* screen */ + info->flags |= FBINFO_VIRTFB; /* system memory */ + if (dma_obj->map_noncoherent) + info->flags |= FBINFO_READS_FAST; /* signal caching */ + info->screen_size = sizes->surface_height * fb->pitches[0]; + info->screen_buffer = map.vaddr; + if (!(info->flags & FBINFO_HIDE_SMEM_START)) { + if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer))) + info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); + } + info->fix.smem_len = info->screen_size; + + return 0; +} + +static int drm_fbdev_dma_driver_fbdev_probe_tail_shadowed(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_client_buffer *buffer = fb_helper->buffer; + struct fb_info *info = fb_helper->info; + size_t screen_size = buffer->gem->size; + void *screen_buffer; + int ret; + + /* + * Deferred I/O requires struct page for framebuffer memory, + * which is not guaranteed for all DMA ranges. We thus create + * a shadow buffer in system memory. + */ + screen_buffer = vzalloc(screen_size); + if (!screen_buffer) + return -ENOMEM; + + info->fbops = &drm_fbdev_dma_shadowed_fb_ops; + + /* screen */ + info->flags |= FBINFO_VIRTFB; /* system memory */ + info->flags |= FBINFO_READS_FAST; /* signal caching */ + info->screen_buffer = screen_buffer; + info->fix.smem_len = screen_size; + + fb_helper->fbdefio.delay = HZ / 20; + fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; + + info->fbdefio = &fb_helper->fbdefio; + ret = fb_deferred_io_init(info); + if (ret) + goto err_vfree; + + return 0; + +err_vfree: + vfree(screen_buffer); + return ret; +} + int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; - bool use_deferred_io = false; struct drm_client_buffer *buffer; - struct drm_gem_dma_object *dma_obj; struct drm_framebuffer *fb; struct fb_info *info; u32 format; @@ -152,19 +288,9 @@ int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, sizes->surface_height, format); if (IS_ERR(buffer)) return PTR_ERR(buffer); - dma_obj = to_drm_gem_dma_obj(buffer->gem); fb = buffer->fb; - /* - * Deferred I/O requires struct page for framebuffer memory, - * which is not guaranteed for all DMA ranges. We thus only - * install deferred I/O if we have a framebuffer that requires - * it. - */ - if (fb->funcs->dirty) - use_deferred_io = true; - ret = drm_client_buffer_vmap(buffer, &map); if (ret) { goto err_drm_client_buffer_delete; @@ -185,45 +311,12 @@ int drm_fbdev_dma_driver_fbdev_probe(struct drm_fb_helper *fb_helper, drm_fb_helper_fill_info(info, fb_helper, sizes); - if (use_deferred_io) - info->fbops = &drm_fbdev_dma_deferred_fb_ops; + if (fb->funcs->dirty) + ret = drm_fbdev_dma_driver_fbdev_probe_tail_shadowed(fb_helper, sizes); else - info->fbops = &drm_fbdev_dma_fb_ops; - - /* screen */ - info->flags |= FBINFO_VIRTFB; /* system memory */ - if (dma_obj->map_noncoherent) - info->flags |= FBINFO_READS_FAST; /* signal caching */ - info->screen_size = sizes->surface_height * fb->pitches[0]; - info->screen_buffer = map.vaddr; - if (!(info->flags & FBINFO_HIDE_SMEM_START)) { - if (!drm_WARN_ON(dev, is_vmalloc_addr(info->screen_buffer))) - info->fix.smem_start = page_to_phys(virt_to_page(info->screen_buffer)); - } - info->fix.smem_len = info->screen_size; - - /* - * Only set up deferred I/O if the screen buffer supports - * it. If this disagrees with the previous test for ->dirty, - * mmap on the /dev/fb file might not work correctly. - */ - if (!is_vmalloc_addr(info->screen_buffer) && info->fix.smem_start) { - unsigned long pfn = info->fix.smem_start >> PAGE_SHIFT; - - if (drm_WARN_ON(dev, !pfn_to_page(pfn))) - use_deferred_io = false; - } - - /* deferred I/O */ - if (use_deferred_io) { - fb_helper->fbdefio.delay = HZ / 20; - fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; - - info->fbdefio = &fb_helper->fbdefio; - ret = fb_deferred_io_init(info); - if (ret) - goto err_drm_fb_helper_release_info; - } + ret = drm_fbdev_dma_driver_fbdev_probe_tail(fb_helper, sizes); + if (ret) + goto err_drm_fb_helper_release_info; return 0; diff --git a/drivers/gpu/drm/hisilicon/hibmc/Kconfig b/drivers/gpu/drm/hisilicon/hibmc/Kconfig index 93b8d32e3be1..98d77d74999d 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Kconfig +++ b/drivers/gpu/drm/hisilicon/hibmc/Kconfig @@ -4,6 +4,8 @@ config DRM_HISI_HIBMC depends on DRM && PCI depends on MMU select DRM_CLIENT_SELECTION + select DRM_DISPLAY_HELPER + select DRM_DISPLAY_DP_HELPER select DRM_KMS_HELPER select DRM_VRAM_HELPER select DRM_TTM diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c index c977b74f82f0..82bf6c654de2 100644 --- a/drivers/gpu/drm/i915/display/icl_dsi.c +++ b/drivers/gpu/drm/i915/display/icl_dsi.c @@ -809,8 +809,8 @@ gen11_dsi_configure_transcoder(struct intel_encoder *encoder, /* select data lane width */ tmp = intel_de_read(display, TRANS_DDI_FUNC_CTL(display, dsi_trans)); - tmp &= ~DDI_PORT_WIDTH_MASK; - tmp |= DDI_PORT_WIDTH(intel_dsi->lane_count); + tmp &= ~TRANS_DDI_PORT_WIDTH_MASK; + tmp |= TRANS_DDI_PORT_WIDTH(intel_dsi->lane_count); /* select input pipe */ tmp &= ~TRANS_DDI_EDP_INPUT_MASK; diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index fc1e517e074a..7e6ce905bdaf 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -41,8 +41,9 @@ static u32 scale(u32 source_val, { u64 target_val; - WARN_ON(source_min > source_max); - WARN_ON(target_min > target_max); + if (WARN_ON(source_min >= source_max) || + WARN_ON(target_min > target_max)) + return target_min; /* defensive */ source_val = clamp(source_val, source_min, source_max); diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c index acb986bc1f33..ff2cf3daa7a2 100644 --- a/drivers/gpu/drm/i915/display/intel_ddi.c +++ b/drivers/gpu/drm/i915/display/intel_ddi.c @@ -658,7 +658,6 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - bool is_mst = intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST); u32 ctl; if (DISPLAY_VER(dev_priv) >= 11) @@ -678,8 +677,7 @@ void intel_ddi_disable_transcoder_func(const struct intel_crtc_state *crtc_state TRANS_DDI_PORT_SYNC_MASTER_SELECT_MASK); if (DISPLAY_VER(dev_priv) >= 12) { - if (!intel_dp_mst_is_master_trans(crtc_state) || - (!is_mst && intel_dp_is_uhbr(crtc_state))) { + if (!intel_dp_mst_is_master_trans(crtc_state)) { ctl &= ~(TGL_TRANS_DDI_PORT_MASK | TRANS_DDI_MODE_SELECT_MASK); } @@ -868,7 +866,7 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, encoder->base.base.id, encoder->base.name); if (!mst_pipe_mask && dp128b132b_pipe_mask) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); /* * If we don't have 8b/10b MST, but have more than one @@ -880,7 +878,8 @@ static void intel_ddi_get_encoder_pipes(struct intel_encoder *encoder, * we don't expect MST to have been enabled at that point, and * can assume it's SST. */ - if (hweight8(dp128b132b_pipe_mask) > 1 || intel_dp->is_mst) + if (hweight8(dp128b132b_pipe_mask) > 1 || + intel_dp_mst_encoder_active_links(dig_port)) mst_pipe_mask = dp128b132b_pipe_mask; } @@ -3134,7 +3133,7 @@ static void intel_ddi_post_disable_dp(struct intel_atomic_state *state, intel_dp_set_power(intel_dp, DP_SET_POWER_D3); if (DISPLAY_VER(dev_priv) >= 12) { - if (is_mst) { + if (is_mst || intel_dp_is_uhbr(old_crtc_state)) { enum transcoder cpu_transcoder = old_crtc_state->cpu_transcoder; intel_de_rmw(dev_priv, @@ -3487,7 +3486,7 @@ static void intel_ddi_enable_hdmi(struct intel_atomic_state *state, intel_de_rmw(dev_priv, XELPDP_PORT_BUF_CTL1(dev_priv, port), XELPDP_PORT_WIDTH_MASK | XELPDP_PORT_REVERSAL, port_buf); - buf_ctl |= DDI_PORT_WIDTH(lane_count); + buf_ctl |= DDI_PORT_WIDTH(crtc_state->lane_count); if (DISPLAY_VER(dev_priv) >= 20) buf_ctl |= XE2LPD_DDI_BUF_D2D_LINK_ENABLE; @@ -4153,13 +4152,13 @@ static void intel_ddi_read_func_ctl(struct intel_encoder *encoder, } else if (ddi_mode == TRANS_DDI_MODE_SELECT_DP_MST) { intel_ddi_read_func_ctl_dp_mst(encoder, pipe_config, ddi_func_ctl); } else if (ddi_mode == TRANS_DDI_MODE_SELECT_FDI_OR_128B132B && HAS_DP20(display)) { - struct intel_dp *intel_dp = enc_to_intel_dp(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); /* * If this is true, we know we're being called from mst stream * encoder's ->get_config(). */ - if (intel_dp->is_mst) + if (intel_dp_mst_encoder_active_links(dig_port)) intel_ddi_read_func_ctl_dp_mst(encoder, pipe_config, ddi_func_ctl); else intel_ddi_read_func_ctl_dp_sst(encoder, pipe_config, ddi_func_ctl); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 4271da219b41..41128469f12a 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -6628,12 +6628,30 @@ static int intel_async_flip_check_hw(struct intel_atomic_state *state, struct in static int intel_joiner_add_affected_crtcs(struct intel_atomic_state *state) { struct drm_i915_private *i915 = to_i915(state->base.dev); + const struct intel_plane_state *plane_state; struct intel_crtc_state *crtc_state; + struct intel_plane *plane; struct intel_crtc *crtc; u8 affected_pipes = 0; u8 modeset_pipes = 0; int i; + /* + * Any plane which is in use by the joiner needs its crtc. + * Pull those in first as this will not have happened yet + * if the plane remains disabled according to uapi. + */ + for_each_new_intel_plane_in_state(state, plane, plane_state, i) { + crtc = to_intel_crtc(plane_state->hw.crtc); + if (!crtc) + continue; + + crtc_state = intel_atomic_get_crtc_state(&state->base, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + } + + /* Now pull in all joined crtcs */ for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { affected_pipes |= crtc_state->joiner_pipes; if (intel_crtc_needs_modeset(crtc_state)) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index f1f3b1bb1e89..aa77ddcee42c 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1791,7 +1791,7 @@ int intel_dp_dsc_max_src_input_bpc(struct intel_display *display) if (DISPLAY_VER(display) == 11) return 10; - return 0; + return intel_dp_dsc_min_src_input_bpc(); } int intel_dp_dsc_compute_max_bpp(const struct intel_connector *connector, @@ -2072,11 +2072,10 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp, /* Compressed BPP should be less than the Input DSC bpp */ dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1); - for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) { - if (valid_dsc_bpp[i] < dsc_min_bpp) + for (i = ARRAY_SIZE(valid_dsc_bpp) - 1; i >= 0; i--) { + if (valid_dsc_bpp[i] < dsc_min_bpp || + valid_dsc_bpp[i] > dsc_max_bpp) continue; - if (valid_dsc_bpp[i] > dsc_max_bpp) - break; ret = dsc_compute_link_config(intel_dp, pipe_config, @@ -2829,7 +2828,6 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp, crtc_state->infoframes.enable |= intel_hdmi_infoframe_enable(DP_SDP_ADAPTIVE_SYNC); - /* Currently only DP_AS_SDP_AVT_FIXED_VTOTAL mode supported */ as_sdp->sdp_type = DP_SDP_ADAPTIVE_SYNC; as_sdp->length = 0x9; as_sdp->duration_incr_ms = 0; @@ -2840,7 +2838,7 @@ static void intel_dp_compute_as_sdp(struct intel_dp *intel_dp, as_sdp->target_rr = drm_mode_vrefresh(adjusted_mode); as_sdp->target_rr_divider = true; } else { - as_sdp->mode = DP_AS_SDP_AVT_FIXED_VTOTAL; + as_sdp->mode = DP_AS_SDP_AVT_DYNAMIC_VTOTAL; as_sdp->vtotal = adjusted_mode->vtotal; as_sdp->target_rr = 0; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_link_training.c b/drivers/gpu/drm/i915/display/intel_dp_link_training.c index 8b1977cfec50..6696a32cdd3e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/display/intel_dp_link_training.c @@ -1563,7 +1563,7 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp, if (wait_for(intel_dp_128b132b_intra_hop(intel_dp, crtc_state) == 0, 500)) { lt_err(intel_dp, DP_PHY_DPRX, "128b/132b intra-hop not clear\n"); - return false; + goto out; } if (intel_dp_128b132b_lane_eq(intel_dp, crtc_state) && @@ -1575,6 +1575,19 @@ intel_dp_128b132b_link_train(struct intel_dp *intel_dp, passed ? "passed" : "failed", crtc_state->port_clock, crtc_state->lane_count); +out: + /* + * Ensure that the training pattern does get set to TPS2 even in case + * of a failure, as is the case at the end of a passing link training + * and what is expected by the transcoder. Leaving TPS1 set (and + * disabling the link train mode in DP_TP_CTL later from TPS1 directly) + * would result in a stuck transcoder HW state and flip-done timeouts + * later in the modeset sequence. + */ + if (!passed) + intel_dp_program_link_training_pattern(intel_dp, crtc_state, + DP_PHY_DPRX, DP_TRAINING_PATTERN_2); + return passed; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 0c44fc7dd86c..a65cf97ad12d 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -341,6 +341,10 @@ int intel_dp_mtp_tu_compute_config(struct intel_dp *intel_dp, break; } + + /* Allow using zero step to indicate one try */ + if (!step) + break; } if (slots < 0) { diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c index 7464b44c8bb3..1bab7c34a794 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_hdcp.c @@ -41,7 +41,7 @@ intel_hdcp_adjust_hdcp_line_rekeying(struct intel_encoder *encoder, u32 rekey_bit = 0; /* Here we assume HDMI is in TMDS mode of operation */ - if (encoder->type != INTEL_OUTPUT_HDMI) + if (!intel_encoder_is_hdmi(encoder)) return; if (DISPLAY_VER(display) >= 30) { @@ -2188,6 +2188,19 @@ static int intel_hdcp2_check_link(struct intel_connector *connector) drm_dbg_kms(display->drm, "HDCP2.2 Downstream topology change\n"); + + ret = hdcp2_authenticate_repeater_topology(connector); + if (!ret) { + intel_hdcp_update_value(connector, + DRM_MODE_CONTENT_PROTECTION_ENABLED, + true); + goto out; + } + + drm_dbg_kms(display->drm, + "[CONNECTOR:%d:%s] Repeater topology auth failed.(%d)\n", + connector->base.base.id, connector->base.name, + ret); } else { drm_dbg_kms(display->drm, "[CONNECTOR:%d:%s] HDCP2.2 link failed, retrying auth\n", diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index ff9764cac1e7..80e558042d97 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -106,8 +106,6 @@ static const u32 icl_sdr_y_plane_formats[] = { DRM_FORMAT_Y216, DRM_FORMAT_XYUV8888, DRM_FORMAT_XVYU2101010, - DRM_FORMAT_XVYU12_16161616, - DRM_FORMAT_XVYU16161616, }; static const u32 icl_sdr_uv_plane_formats[] = { @@ -134,8 +132,6 @@ static const u32 icl_sdr_uv_plane_formats[] = { DRM_FORMAT_Y216, DRM_FORMAT_XYUV8888, DRM_FORMAT_XVYU2101010, - DRM_FORMAT_XVYU12_16161616, - DRM_FORMAT_XVYU16161616, }; static const u32 icl_hdr_plane_formats[] = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index fe69f2c8527d..ae3343c81a64 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -209,8 +209,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct address_space *mapping = obj->base.filp->f_mapping; unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; - struct sgt_iter sgt_iter; - struct page *page; int ret; /* @@ -239,9 +237,7 @@ rebuild_st: * for PAGE_SIZE chunks instead may be helpful. */ if (max_segment > PAGE_SIZE) { - for_each_sgt_page(page, sgt_iter, st) - put_page(page); - sg_free_table(st); + shmem_sg_free_table(st, mapping, false, false); kfree(st); max_segment = PAGE_SIZE; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 12f1ba7ca9c1..3fce5c000144 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1469,6 +1469,19 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) spin_unlock_irqrestore(&guc->timestamp.lock, flags); } +static void __update_guc_busyness_running_state(struct intel_guc *guc) +{ + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long flags; + + spin_lock_irqsave(&guc->timestamp.lock, flags); + for_each_engine(engine, gt, id) + engine->stats.guc.running = false; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); +} + static void __update_guc_busyness_stats(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); @@ -1619,6 +1632,9 @@ void intel_guc_busyness_park(struct intel_gt *gt) if (!guc_submission_initialized(guc)) return; + /* Assume no engines are running and set running state to false */ + __update_guc_busyness_running_state(guc); + /* * There is a race with suspend flow where the worker runs after suspend * and causes an unclaimed register access warning. Cancel the worker @@ -3433,10 +3449,10 @@ static inline int guc_lrc_desc_unpin(struct intel_context *ce) */ ret = deregister_context(ce, ce->guc_id.id); if (ret) { - spin_lock(&ce->guc_state.lock); + spin_lock_irqsave(&ce->guc_state.lock, flags); set_context_registered(ce); clr_context_destroyed(ce); - spin_unlock(&ce->guc_state.lock); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * As gt-pm is awake at function entry, intel_wakeref_put_async merely decrements * the wakeref immediately but per function spec usage call this after unlock. @@ -5519,12 +5535,20 @@ static inline void guc_log_context(struct drm_printer *p, { drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); - drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", - ce->ring->head, - ce->lrc_reg_state[CTX_RING_HEAD]); - drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", - ce->ring->tail, - ce->lrc_reg_state[CTX_RING_TAIL]); + if (intel_context_pin_if_active(ce)) { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", + ce->ring->head, + ce->lrc_reg_state[CTX_RING_HEAD]); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory %u\n", + ce->ring->tail, + ce->lrc_reg_state[CTX_RING_TAIL]); + intel_context_unpin(ce); + } else { + drm_printf(p, "\t\tLRC Head: Internal %u, Memory not pinned\n", + ce->ring->head); + drm_printf(p, "\t\tLRC Tail: Internal %u, Memory not pinned\n", + ce->ring->tail); + } drm_printf(p, "\t\tContext Pin Count: %u\n", atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 765e6c0528fb..786c727aea45 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3633,7 +3633,7 @@ enum skl_power_gate { #define DDI_BUF_IS_IDLE (1 << 7) #define DDI_BUF_CTL_TC_PHY_OWNERSHIP REG_BIT(6) #define DDI_A_4_LANES (1 << 4) -#define DDI_PORT_WIDTH(width) (((width) - 1) << 1) +#define DDI_PORT_WIDTH(width) (((width) == 3 ? 4 : ((width) - 1)) << 1) #define DDI_PORT_WIDTH_MASK (7 << 1) #define DDI_PORT_WIDTH_SHIFT 1 #define DDI_INIT_DISPLAY_DETECTED (1 << 0) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 5c397a2df70e..5d27e1c733c5 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -168,7 +168,7 @@ static int igt_ppgtt_alloc(void *arg) return PTR_ERR(ppgtt); if (!ppgtt->vm.allocate_va_range) - goto err_ppgtt_cleanup; + goto ppgtt_vm_put; /* * While we only allocate the page tables here and so we could @@ -236,7 +236,7 @@ err_ppgtt_cleanup: goto retry; } i915_gem_ww_ctx_fini(&ww); - +ppgtt_vm_put: i915_vm_put(&ppgtt->vm); return err; } diff --git a/drivers/gpu/drm/imagination/Makefile b/drivers/gpu/drm/imagination/Makefile index 9bc6a3884c22..3d9d4d40fb80 100644 --- a/drivers/gpu/drm/imagination/Makefile +++ b/drivers/gpu/drm/imagination/Makefile @@ -1,8 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only OR MIT # Copyright (c) 2023 Imagination Technologies Ltd. -subdir-ccflags-y := -I$(src) - powervr-y := \ pvr_ccb.o \ pvr_cccb.o \ diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 65d38b25c070..699b0dd34b18 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -813,10 +813,10 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu) } ver = gmu_read(gmu, REG_A6XX_GMU_CORE_FW_VERSION); - DRM_INFO("Loaded GMU firmware v%u.%u.%u\n", - FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MAJOR__MASK, ver), - FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MINOR__MASK, ver), - FIELD_GET(A6XX_GMU_CORE_FW_VERSION_STEP__MASK, ver)); + DRM_INFO_ONCE("Loaded GMU firmware v%u.%u.%u\n", + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MAJOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_MINOR__MASK, ver), + FIELD_GET(A6XX_GMU_CORE_FW_VERSION_STEP__MASK, ver)); return 0; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h index 421afacb7248..36cc9dbc00b5 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_0_sm8150.h @@ -297,7 +297,7 @@ static const struct dpu_wb_cfg sm8150_wb[] = { { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, - .features = WB_SDM845_MASK, + .features = WB_SM8250_MASK, .format_list = wb2_formats_rgb, .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h index 641023b102bf..e8eacdb47967 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_1_sc8180x.h @@ -304,7 +304,7 @@ static const struct dpu_wb_cfg sc8180x_wb[] = { { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, - .features = WB_SDM845_MASK, + .features = WB_SM8250_MASK, .format_list = wb2_formats_rgb, .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h index 621a2140f675..d761ed705bac 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_3_sm6150.h @@ -116,14 +116,12 @@ static const struct dpu_lm_cfg sm6150_lm[] = { .sblk = &sdm845_lm_sblk, .pingpong = PINGPONG_0, .dspp = DSPP_0, - .lm_pair = LM_1, }, { .name = "lm_1", .id = LM_1, .base = 0x45000, .len = 0x320, .features = MIXER_QCM2290_MASK, .sblk = &sdm845_lm_sblk, .pingpong = PINGPONG_1, - .lm_pair = LM_0, }, { .name = "lm_2", .id = LM_2, .base = 0x46000, .len = 0x320, diff --git a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h index d039b96beb97..76f60a2df7a8 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h +++ b/drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_5_4_sm6125.h @@ -144,7 +144,7 @@ static const struct dpu_wb_cfg sm6125_wb[] = { { .name = "wb_2", .id = WB_2, .base = 0x65000, .len = 0x2c8, - .features = WB_SDM845_MASK, + .features = WB_SM8250_MASK, .format_list = wb2_formats_rgb, .num_formats = ARRAY_SIZE(wb2_formats_rgb), .clk_ctrl = DPU_CLK_CTRL_WB2, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index 7191b1a6d41b..e5dcd41a361f 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -1228,8 +1228,6 @@ static int dpu_crtc_reassign_planes(struct drm_crtc *crtc, struct drm_crtc_state done: kfree(states); return ret; - - return 0; } static int dpu_crtc_atomic_check(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 5172ab4dea99..48e6e8d74c85 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -2281,6 +2281,9 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) } } + if (phys_enc->hw_pp && phys_enc->hw_pp->ops.setup_dither) + phys_enc->hw_pp->ops.setup_dither(phys_enc->hw_pp, NULL); + /* reset the merge 3D HW block */ if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) { phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c index 657200401f57..cec6d4e8baec 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c @@ -52,6 +52,7 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc, u32 slice_last_group_size; u32 det_thresh_flatness; bool is_cmd_mode = !(mode & DSC_MODE_VIDEO); + bool input_10_bits = dsc->bits_per_component == 10; DPU_REG_WRITE(c, DSC_COMMON_MODE, mode); @@ -68,7 +69,7 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc, data |= (dsc->line_buf_depth << 3); data |= (dsc->simple_422 << 2); data |= (dsc->convert_rgb << 1); - data |= dsc->bits_per_component; + data |= input_10_bits; DPU_REG_WRITE(c, DSC_ENC, data); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c index ad19330de61a..562a3f4c5238 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_top.c @@ -272,7 +272,7 @@ static void _setup_mdp_ops(struct dpu_hw_mdp_ops *ops, if (cap & BIT(DPU_MDP_VSYNC_SEL)) ops->setup_vsync_source = dpu_hw_setup_vsync_sel; - else + else if (!(cap & BIT(DPU_MDP_PERIPH_0_REMOVED))) ops->setup_vsync_source = dpu_hw_setup_wd_timer; ops->get_safe_status = dpu_hw_get_safe_status; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 098abc2c0003..af3e541f60c3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -1164,7 +1164,6 @@ int dpu_assign_plane_resources(struct dpu_global_state *global_state, unsigned int num_planes) { unsigned int i; - int ret; for (i = 0; i < num_planes; i++) { struct drm_plane_state *plane_state = states[i]; @@ -1173,13 +1172,13 @@ int dpu_assign_plane_resources(struct dpu_global_state *global_state, !plane_state->visible) continue; - ret = dpu_plane_virtual_assign_resources(crtc, global_state, + int ret = dpu_plane_virtual_assign_resources(crtc, global_state, state, plane_state); if (ret) - break; + return ret; } - return ret; + return 0; } static void dpu_plane_flush_csc(struct dpu_plane *pdpu, struct dpu_sw_pipe *pipe) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 24dd37f1682b..3898850739ab 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -930,16 +930,17 @@ enum drm_mode_status msm_dp_bridge_mode_valid(struct drm_bridge *bridge, return -EINVAL; } - if (mode->clock > DP_MAX_PIXEL_CLK_KHZ) - return MODE_CLOCK_HIGH; - msm_dp_display = container_of(dp, struct msm_dp_display_private, msm_dp_display); link_info = &msm_dp_display->panel->link_info; - if (drm_mode_is_420_only(&dp->connector->display_info, mode) && - msm_dp_display->panel->vsc_sdp_supported) + if ((drm_mode_is_420_only(&dp->connector->display_info, mode) && + msm_dp_display->panel->vsc_sdp_supported) || + msm_dp_wide_bus_available(dp)) mode_pclk_khz /= 2; + if (mode_pclk_khz > DP_MAX_PIXEL_CLK_KHZ) + return MODE_CLOCK_HIGH; + mode_bpp = dp->connector->display_info.bpc * num_components; if (!mode_bpp) mode_bpp = default_bpp; diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c index d3e241ea6941..16b7913d1eef 100644 --- a/drivers/gpu/drm/msm/dp/dp_drm.c +++ b/drivers/gpu/drm/msm/dp/dp_drm.c @@ -257,7 +257,10 @@ static enum drm_mode_status msm_edp_bridge_mode_valid(struct drm_bridge *bridge, return -EINVAL; } - if (mode->clock > DP_MAX_PIXEL_CLK_KHZ) + if (msm_dp_wide_bus_available(dp)) + mode_pclk_khz /= 2; + + if (mode_pclk_khz > DP_MAX_PIXEL_CLK_KHZ) return MODE_CLOCK_HIGH; /* diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c index 031446c87dae..798168180c1a 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_7nm.c @@ -83,6 +83,9 @@ struct dsi_pll_7nm { /* protects REG_DSI_7nm_PHY_CMN_CLK_CFG0 register */ spinlock_t postdiv_lock; + /* protects REG_DSI_7nm_PHY_CMN_CLK_CFG1 register */ + spinlock_t pclk_mux_lock; + struct pll_7nm_cached_state cached_state; struct dsi_pll_7nm *slave; @@ -372,22 +375,41 @@ static void dsi_pll_enable_pll_bias(struct dsi_pll_7nm *pll) ndelay(250); } -static void dsi_pll_disable_global_clk(struct dsi_pll_7nm *pll) +static void dsi_pll_cmn_clk_cfg0_write(struct dsi_pll_7nm *pll, u32 val) { + unsigned long flags; + + spin_lock_irqsave(&pll->postdiv_lock, flags); + writel(val, pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG0); + spin_unlock_irqrestore(&pll->postdiv_lock, flags); +} + +static void dsi_pll_cmn_clk_cfg1_update(struct dsi_pll_7nm *pll, u32 mask, + u32 val) +{ + unsigned long flags; u32 data; + spin_lock_irqsave(&pll->pclk_mux_lock, flags); data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); - writel(data & ~BIT(5), pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + data &= ~mask; + data |= val & mask; + + writel(data, pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + spin_unlock_irqrestore(&pll->pclk_mux_lock, flags); +} + +static void dsi_pll_disable_global_clk(struct dsi_pll_7nm *pll) +{ + dsi_pll_cmn_clk_cfg1_update(pll, DSI_7nm_PHY_CMN_CLK_CFG1_CLK_EN, 0); } static void dsi_pll_enable_global_clk(struct dsi_pll_7nm *pll) { - u32 data; + u32 cfg_1 = DSI_7nm_PHY_CMN_CLK_CFG1_CLK_EN | DSI_7nm_PHY_CMN_CLK_CFG1_CLK_EN_SEL; writel(0x04, pll->phy->base + REG_DSI_7nm_PHY_CMN_CTRL_3); - - data = readl(pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); - writel(data | BIT(5) | BIT(4), pll->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + dsi_pll_cmn_clk_cfg1_update(pll, cfg_1, cfg_1); } static void dsi_pll_phy_dig_reset(struct dsi_pll_7nm *pll) @@ -565,7 +587,6 @@ static int dsi_7nm_pll_restore_state(struct msm_dsi_phy *phy) { struct dsi_pll_7nm *pll_7nm = to_pll_7nm(phy->vco_hw); struct pll_7nm_cached_state *cached = &pll_7nm->cached_state; - void __iomem *phy_base = pll_7nm->phy->base; u32 val; int ret; @@ -574,13 +595,10 @@ static int dsi_7nm_pll_restore_state(struct msm_dsi_phy *phy) val |= cached->pll_out_div; writel(val, pll_7nm->phy->pll_base + REG_DSI_7nm_PHY_PLL_PLL_OUTDIV_RATE); - writel(cached->bit_clk_div | (cached->pix_clk_div << 4), - phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG0); - - val = readl(phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); - val &= ~0x3; - val |= cached->pll_mux; - writel(val, phy_base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + dsi_pll_cmn_clk_cfg0_write(pll_7nm, + DSI_7nm_PHY_CMN_CLK_CFG0_DIV_CTRL_3_0(cached->bit_clk_div) | + DSI_7nm_PHY_CMN_CLK_CFG0_DIV_CTRL_7_4(cached->pix_clk_div)); + dsi_pll_cmn_clk_cfg1_update(pll_7nm, 0x3, cached->pll_mux); ret = dsi_pll_7nm_vco_set_rate(phy->vco_hw, pll_7nm->vco_current_rate, @@ -599,7 +617,6 @@ static int dsi_7nm_pll_restore_state(struct msm_dsi_phy *phy) static int dsi_7nm_set_usecase(struct msm_dsi_phy *phy) { struct dsi_pll_7nm *pll_7nm = to_pll_7nm(phy->vco_hw); - void __iomem *base = phy->base; u32 data = 0x0; /* internal PLL */ DBG("DSI PLL%d", pll_7nm->phy->id); @@ -618,7 +635,8 @@ static int dsi_7nm_set_usecase(struct msm_dsi_phy *phy) } /* set PLL src */ - writel(data << 2, base + REG_DSI_7nm_PHY_CMN_CLK_CFG1); + dsi_pll_cmn_clk_cfg1_update(pll_7nm, DSI_7nm_PHY_CMN_CLK_CFG1_BITCLK_SEL__MASK, + DSI_7nm_PHY_CMN_CLK_CFG1_BITCLK_SEL(data)); return 0; } @@ -733,7 +751,7 @@ static int pll_7nm_register(struct dsi_pll_7nm *pll_7nm, struct clk_hw **provide pll_by_2_bit, }), 2, 0, pll_7nm->phy->base + REG_DSI_7nm_PHY_CMN_CLK_CFG1, - 0, 1, 0, NULL); + 0, 1, 0, &pll_7nm->pclk_mux_lock); if (IS_ERR(hw)) { ret = PTR_ERR(hw); goto fail; @@ -778,6 +796,7 @@ static int dsi_pll_7nm_init(struct msm_dsi_phy *phy) pll_7nm_list[phy->id] = pll_7nm; spin_lock_init(&pll_7nm->postdiv_lock); + spin_lock_init(&pll_7nm->pclk_mux_lock); pll_7nm->phy = phy; diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index fee31680a6d5..a65077855201 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -537,15 +537,12 @@ static inline int align_pitch(int width, int bpp) static inline unsigned long timeout_to_jiffies(const ktime_t *timeout) { ktime_t now = ktime_get(); - s64 remaining_jiffies; - if (ktime_compare(*timeout, now) < 0) { - remaining_jiffies = 0; - } else { - ktime_t rem = ktime_sub(*timeout, now); - remaining_jiffies = ktime_divns(rem, NSEC_PER_SEC / HZ); - } + if (ktime_compare(*timeout, now) <= 0) + return 0; + ktime_t rem = ktime_sub(*timeout, now); + s64 remaining_jiffies = ktime_divns(rem, NSEC_PER_SEC / HZ); return clamp(remaining_jiffies, 1LL, (s64)INT_MAX); } diff --git a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml index d54b72f92449..35f7f40e405b 100644 --- a/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml +++ b/drivers/gpu/drm/msm/registers/display/dsi_phy_7nm.xml @@ -9,8 +9,15 @@ xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> <reg32 offset="0x00004" name="REVISION_ID1"/> <reg32 offset="0x00008" name="REVISION_ID2"/> <reg32 offset="0x0000c" name="REVISION_ID3"/> - <reg32 offset="0x00010" name="CLK_CFG0"/> - <reg32 offset="0x00014" name="CLK_CFG1"/> + <reg32 offset="0x00010" name="CLK_CFG0"> + <bitfield name="DIV_CTRL_3_0" low="0" high="3" type="uint"/> + <bitfield name="DIV_CTRL_7_4" low="4" high="7" type="uint"/> + </reg32> + <reg32 offset="0x00014" name="CLK_CFG1"> + <bitfield name="CLK_EN" pos="5" type="boolean"/> + <bitfield name="CLK_EN_SEL" pos="4" type="boolean"/> + <bitfield name="BITCLK_SEL" low="2" high="3" type="uint"/> + </reg32> <reg32 offset="0x00018" name="GLBL_CTRL"/> <reg32 offset="0x0001c" name="RBUF_CTRL"/> <reg32 offset="0x00020" name="VREG_CTRL_0"/> diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 8d5c9c74cbb9..eac0d1d2dbda 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -775,7 +775,6 @@ nouveau_connector_force(struct drm_connector *connector) if (!nv_encoder) { NV_ERROR(drm, "can't find encoder to force %s on!\n", connector->name); - connector->status = connector_status_disconnected; return; } diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index b4da82ddbb6b..8ea98f06d39a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -590,6 +590,7 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, unsigned long timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); struct mm_struct *mm = svmm->notifier.mm; + struct folio *folio; struct page *page; unsigned long start = args->p.addr; unsigned long notifier_seq; @@ -616,12 +617,16 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, ret = -EINVAL; goto out; } + folio = page_folio(page); mutex_lock(&svmm->mutex); if (!mmu_interval_read_retry(¬ifier->notifier, notifier_seq)) break; mutex_unlock(&svmm->mutex); + + folio_unlock(folio); + folio_put(folio); } /* Map the page on the GPU. */ @@ -637,8 +642,8 @@ static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); mutex_unlock(&svmm->mutex); - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); out: mmu_interval_notifier_remove(¬ifier->notifier); diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c index a6f410ba60bc..d393bc540f86 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c @@ -75,7 +75,7 @@ gp10b_pmu_acr = { .bootstrap_multiple_falcons = gp10b_pmu_acr_bootstrap_multiple_falcons, }; -#if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC) +#if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC) MODULE_FIRMWARE("nvidia/gp10b/pmu/desc.bin"); MODULE_FIRMWARE("nvidia/gp10b/pmu/image.bin"); MODULE_FIRMWARE("nvidia/gp10b/pmu/sig.bin"); diff --git a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c index 45d09e6fa667..7d68a8acfe2e 100644 --- a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c +++ b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c @@ -109,13 +109,13 @@ static int jadard_prepare(struct drm_panel *panel) if (jadard->desc->lp11_to_reset_delay_ms) msleep(jadard->desc->lp11_to_reset_delay_ms); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(5); - gpiod_set_value(jadard->reset, 0); + gpiod_set_value(jadard->reset, 1); msleep(10); - gpiod_set_value(jadard->reset, 1); + gpiod_set_value(jadard->reset, 0); msleep(130); ret = jadard->desc->init(jadard); @@ -1130,7 +1130,7 @@ static int jadard_dsi_probe(struct mipi_dsi_device *dsi) dsi->format = desc->format; dsi->lanes = desc->lanes; - jadard->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); + jadard->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH); if (IS_ERR(jadard->reset)) { DRM_DEV_ERROR(&dsi->dev, "failed to get our reset GPIO\n"); return PTR_ERR(jadard->reset); diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index d5dcd3d1b33a..08136e790ca0 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -802,6 +802,7 @@ static void panthor_query_group_priorities_info(struct drm_file *file, { int prio; + memset(arg, 0, sizeof(*arg)); for (prio = PANTHOR_GROUP_PRIORITY_REALTIME; prio >= 0; prio--) { if (!group_priority_permit(file, prio)) arg->allowed_mask |= BIT(prio); diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index b976a5e9aef5..23ecc00accb2 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -70,10 +70,17 @@ static int light_up_connector(struct kunit *test, state = drm_kunit_helper_atomic_state_alloc(test, drm, ctx); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, state); +retry: conn_state = drm_atomic_get_connector_state(state, connector); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); ret = drm_atomic_set_crtc_for_connector(conn_state, crtc); + if (ret == -EDEADLK) { + drm_atomic_state_clear(state); + ret = drm_modeset_backoff(ctx); + if (!ret) + goto retry; + } KUNIT_EXPECT_EQ(test, ret, 0); crtc_state = drm_atomic_get_crtc_state(state, crtc); @@ -282,15 +289,16 @@ static void drm_test_check_broadcast_rgb_crtc_mode_changed(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; + preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -345,15 +353,16 @@ static void drm_test_check_broadcast_rgb_crtc_mode_not_changed(struct kunit *tes 8); KUNIT_ASSERT_NOT_NULL(test, priv); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; + preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -408,18 +417,18 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -474,7 +483,6 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode_vic_1(struct kunit *test) mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); - drm = &priv->drm; crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -520,18 +528,18 @@ static void drm_test_check_broadcast_rgb_full_cea_mode(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -588,7 +596,6 @@ static void drm_test_check_broadcast_rgb_full_cea_mode_vic_1(struct kunit *test) mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); - drm = &priv->drm; crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -636,18 +643,18 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; KUNIT_ASSERT_TRUE(test, conn->display_info.is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_NE(test, drm_match_cea_mode(preferred), 1); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -704,7 +711,6 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode_vic_1(struct kunit *te mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); - drm = &priv->drm; crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -754,20 +760,20 @@ static void drm_test_check_output_bpc_crtc_mode_changed(struct kunit *test) 10); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); KUNIT_ASSERT_GT(test, ret, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -828,20 +834,20 @@ static void drm_test_check_output_bpc_crtc_mode_not_changed(struct kunit *test) 10); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); KUNIT_ASSERT_GT(test, ret, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -899,6 +905,8 @@ static void drm_test_check_output_bpc_dvi(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_dvi_1080p, @@ -908,14 +916,12 @@ static void drm_test_check_output_bpc_dvi(struct kunit *test) info = &conn->display_info; KUNIT_ASSERT_FALSE(test, info->is_hdmi); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -946,21 +952,21 @@ static void drm_test_check_tmds_char_rate_rgb_8bpc(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_200mhz)); KUNIT_ASSERT_GT(test, ret, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -993,21 +999,21 @@ static void drm_test_check_tmds_char_rate_rgb_10bpc(struct kunit *test) 10); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz)); KUNIT_ASSERT_GT(test, ret, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1040,21 +1046,21 @@ static void drm_test_check_tmds_char_rate_rgb_12bpc(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz)); KUNIT_ASSERT_GT(test, ret, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1091,15 +1097,16 @@ static void drm_test_check_hdmi_funcs_reject_rate(struct kunit *test) 8); KUNIT_ASSERT_NOT_NULL(test, priv); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; + preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_ASSERT_EQ(test, ret, 0); @@ -1147,6 +1154,8 @@ static void drm_test_check_max_tmds_rate_bpc_fallback(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, @@ -1157,9 +1166,6 @@ static void drm_test_check_max_tmds_rate_bpc_fallback(struct kunit *test) KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); @@ -1170,8 +1176,9 @@ static void drm_test_check_max_tmds_rate_bpc_fallback(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 10, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1216,6 +1223,8 @@ static void drm_test_check_max_tmds_rate_format_fallback(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, @@ -1226,9 +1235,6 @@ static void drm_test_check_max_tmds_rate_format_fallback(struct kunit *test) KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); KUNIT_ASSERT_FALSE(test, preferred->flags & DRM_MODE_FLAG_DBLCLK); @@ -1242,8 +1248,9 @@ static void drm_test_check_max_tmds_rate_format_fallback(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1290,9 +1297,6 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test) KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1); KUNIT_ASSERT_NOT_NULL(test, mode); @@ -1306,7 +1310,9 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test) rate = mode->clock * 1500; KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + crtc = priv->crtc; ret = light_up_connector(test, drm, crtc, conn, mode, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1340,6 +1346,8 @@ static void drm_test_check_output_bpc_format_driver_rgb_only(struct kunit *test) 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, @@ -1350,9 +1358,6 @@ static void drm_test_check_output_bpc_format_driver_rgb_only(struct kunit *test) KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1371,8 +1376,9 @@ static void drm_test_check_output_bpc_format_driver_rgb_only(struct kunit *test) rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1407,6 +1413,8 @@ static void drm_test_check_output_bpc_format_display_rgb_only(struct kunit *test 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_200mhz, @@ -1417,9 +1425,6 @@ static void drm_test_check_output_bpc_format_display_rgb_only(struct kunit *test KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1438,8 +1443,9 @@ static void drm_test_check_output_bpc_format_display_rgb_only(struct kunit *test rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_YUV422); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1473,6 +1479,8 @@ static void drm_test_check_output_bpc_format_driver_8bpc_only(struct kunit *test 8); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, @@ -1483,9 +1491,6 @@ static void drm_test_check_output_bpc_format_driver_8bpc_only(struct kunit *test KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1496,8 +1501,9 @@ static void drm_test_check_output_bpc_format_driver_8bpc_only(struct kunit *test rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); @@ -1533,6 +1539,8 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes 12); KUNIT_ASSERT_NOT_NULL(test, priv); + drm = &priv->drm; + crtc = priv->crtc; conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_340mhz, @@ -1543,9 +1551,6 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes KUNIT_ASSERT_TRUE(test, info->is_hdmi); KUNIT_ASSERT_GT(test, info->max_tmds_clock, 0); - ctx = drm_kunit_helper_acquire_ctx_alloc(test); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); - preferred = find_preferred_mode(conn); KUNIT_ASSERT_NOT_NULL(test, preferred); @@ -1556,8 +1561,9 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes rate = drm_hdmi_compute_mode_clock(preferred, 12, HDMI_COLORSPACE_RGB); KUNIT_ASSERT_LT(test, rate, info->max_tmds_clock * 1000); - drm = &priv->drm; - crtc = priv->crtc; + ctx = drm_kunit_helper_acquire_ctx_alloc(test); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); + ret = light_up_connector(test, drm, crtc, conn, preferred, ctx); KUNIT_EXPECT_EQ(test, ret, 0); diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index b20ac1705726..fa269d279e25 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -67,7 +67,7 @@ static u16 lerp_u16(u16 a, u16 b, s64 t) s64 delta = drm_fixp_mul(b_fp - a_fp, t); - return drm_fixp2int(a_fp + delta); + return drm_fixp2int_round(a_fp + delta); } static s64 get_lut_index(const struct vkms_color_lut *lut, u16 channel_value) diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h index d86219dedde2..b732c89816df 100644 --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h @@ -53,7 +53,6 @@ #define RING_CTL(base) XE_REG((base) + 0x3c) #define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ -#define RING_CTL_SIZE(size) ((size) - PAGE_SIZE) /* in bytes -> pages */ #define RING_START_UDW(base) XE_REG((base) + 0x48) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index a49561e9f3c3..a79ad2da070c 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -51,6 +51,10 @@ /* Common to all OA units */ #define OA_OACONTROL_REPORT_BC_MASK REG_GENMASK(9, 9) #define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8) +#define OAG_OACONTROL_USED_BITS \ + (OAG_OACONTROL_OA_PES_DISAG_EN | OAG_OACONTROL_OA_CCS_SELECT_MASK | \ + OAG_OACONTROL_OA_COUNTER_SEL_MASK | OAG_OACONTROL_OA_COUNTER_ENABLE | \ + OA_OACONTROL_REPORT_BC_MASK | OA_OACONTROL_COUNTER_SIZE_MASK) #define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) #define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14) @@ -78,6 +82,8 @@ #define OAM_CONTEXT_CONTROL_OFFSET (0x1bc) #define OAM_CONTROL_OFFSET (0x194) #define OAM_CONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAM_OACONTROL_USED_BITS \ + (OAM_CONTROL_COUNTER_SEL_MASK | OAG_OACONTROL_OA_COUNTER_ENABLE) #define OAM_DEBUG_OFFSET (0x198) #define OAM_STATUS_OFFSET (0x19c) #define OAM_MMIO_TRG_OFFSET (0x1d0) diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 81dc7795c065..39fe485d2085 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -119,11 +119,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, drm_puts(&p, "\n**** GuC CT ****\n"); xe_guc_ct_snapshot_print(ss->guc.ct, &p); - /* - * Don't add a new section header here because the mesa debug decoder - * tool expects the context information to be in the 'GuC CT' section. - */ - /* drm_puts(&p, "\n**** Contexts ****\n"); */ + drm_puts(&p, "\n**** Contexts ****\n"); xe_guc_exec_queue_snapshot_print(ss->ge, &p); drm_puts(&p, "\n**** Job ****\n"); @@ -395,42 +391,34 @@ int xe_devcoredump_init(struct xe_device *xe) /** * xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85 * - * The output is split to multiple lines because some print targets, e.g. dmesg - * cannot handle arbitrarily long lines. Note also that printing to dmesg in - * piece-meal fashion is not possible, each separate call to drm_puts() has a - * line-feed automatically added! Therefore, the entire output line must be - * constructed in a local buffer first, then printed in one atomic output call. + * The output is split into multiple calls to drm_puts() because some print + * targets, e.g. dmesg, cannot handle arbitrarily long lines. These targets may + * add newlines, as is the case with dmesg: each drm_puts() call creates a + * separate line. * * There is also a scheduler yield call to prevent the 'task has been stuck for * 120s' kernel hang check feature from firing when printing to a slow target * such as dmesg over a serial port. * - * TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down. - * * @p: the printer object to output to * @prefix: optional prefix to add to output string + * @suffix: optional suffix to add at the end. 0 disables it and is + * not added to the output, which is useful when using multiple calls + * to dump data to @p * @blob: the Binary Large OBject to dump out * @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32) * @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32) */ -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size) { const u32 *blob32 = (const u32 *)blob; char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0; - /* - * Splitting blobs across multiple lines is not compatible with the mesa - * debug decoder tool. Note that even dropping the explicit '\n' below - * doesn't help because the GuC log is so big some underlying implementation - * still splits the lines at 512K characters. So just bail completely for - * the moment. - */ - return; - #define DMESG_MAX_LINE_LEN 800 -#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */ + /* Always leave space for the suffix char and the \0 */ +#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "<suffix>\0" */ if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); @@ -462,7 +450,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, line_pos += strlen(line_buff + line_pos); if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; drm_puts(p, line_buff); @@ -474,10 +461,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, } } + if (suffix) + line_buff[line_pos++] = suffix; + if (line_pos) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; - drm_puts(p, line_buff); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index 6a17e6d60102..5391a80a4d1b 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -29,7 +29,7 @@ static inline int xe_devcoredump_init(struct xe_device *xe) } #endif -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size); #endif diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 63f30b6df70b..2d4874d2b922 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -135,8 +135,8 @@ void xe_drm_client_add_bo(struct xe_drm_client *client, XE_WARN_ON(bo->client); XE_WARN_ON(!list_empty(&bo->client_link)); - spin_lock(&client->bos_lock); bo->client = xe_drm_client_get(client); + spin_lock(&client->bos_lock); list_add_tail(&bo->client_link, &client->bos_list); spin_unlock(&client->bos_lock); } diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 26e64530ada2..5d6fb79957b6 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -532,8 +532,10 @@ static int all_fw_domain_init(struct xe_gt *gt) if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt)) xe_lmtt_init_hw(>_to_tile(gt)->sriov.pf.lmtt); - if (IS_SRIOV_PF(gt_to_xe(gt))) + if (IS_SRIOV_PF(gt_to_xe(gt))) { + xe_gt_sriov_pf_init(gt); xe_gt_sriov_pf_init_hw(gt); + } xe_force_wake_put(gt_to_fw(gt), fw_ref); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c index e71fc3d2bda2..6f906c8e8108 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c @@ -68,6 +68,19 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } +/** + * xe_gt_sriov_pf_init - Prepare SR-IOV PF data structures on PF. + * @gt: the &xe_gt to initialize + * + * Late one-time initialization of the PF data. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_init(struct xe_gt *gt) +{ + return xe_gt_sriov_pf_migration_init(gt); +} + static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe) { return GRAPHICS_VERx100(xe) == 1200; @@ -90,7 +103,6 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) pf_enable_ggtt_guest_update(gt); xe_gt_sriov_pf_service_update(gt); - xe_gt_sriov_pf_migration_init(gt); } static u32 pf_get_vf_regs_stride(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h index 96fab779a906..f474509411c0 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h @@ -10,6 +10,7 @@ struct xe_gt; #ifdef CONFIG_PCI_IOV int xe_gt_sriov_pf_init_early(struct xe_gt *gt); +int xe_gt_sriov_pf_init(struct xe_gt *gt); void xe_gt_sriov_pf_init_hw(struct xe_gt *gt); void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid); void xe_gt_sriov_pf_restart(struct xe_gt *gt); @@ -19,6 +20,11 @@ static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt) return 0; } +static inline int xe_gt_sriov_pf_init(struct xe_gt *gt) +{ + return 0; +} + static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt) { } diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8b65c5e959cc..72ad576fc18e 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -1723,8 +1723,11 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, drm_printf(p, "\tg2h outstanding: %d\n", snapshot->g2h_outstanding); - if (snapshot->ctb) - xe_print_blob_ascii85(p, "CTB data", snapshot->ctb, 0, snapshot->ctb_size); + if (snapshot->ctb) { + drm_printf(p, "[CTB].length: 0x%zx\n", snapshot->ctb_size); + xe_print_blob_ascii85(p, "[CTB].data", '\n', + snapshot->ctb, 0, snapshot->ctb_size); + } } else { drm_puts(p, "CT disabled\n"); } diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c index df4cfb698cdb..0ca3056d8bd3 100644 --- a/drivers/gpu/drm/xe/xe_guc_log.c +++ b/drivers/gpu/drm/xe/xe_guc_log.c @@ -208,11 +208,14 @@ void xe_guc_log_snapshot_print(struct xe_guc_log_snapshot *snapshot, struct drm_ drm_printf(p, "GuC timestamp: 0x%08llX [%llu]\n", snapshot->stamp, snapshot->stamp); drm_printf(p, "Log level: %u\n", snapshot->level); + drm_printf(p, "[LOG].length: 0x%zx\n", snapshot->size); remain = snapshot->size; for (i = 0; i < snapshot->num_chunks; i++) { size_t size = min(GUC_LOG_CHUNK_SIZE, remain); + const char *prefix = i ? NULL : "[LOG].data"; + char suffix = i == snapshot->num_chunks - 1 ? '\n' : 0; - xe_print_blob_ascii85(p, i ? NULL : "Log data", snapshot->copy[i], 0, size); + xe_print_blob_ascii85(p, prefix, suffix, snapshot->copy[i], 0, size); remain -= size; } } diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 913c74d6e2ae..b6a2dd742ebd 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1248,6 +1248,8 @@ static void __guc_exec_queue_fini_async(struct work_struct *w) if (xe_exec_queue_is_lr(q)) cancel_work_sync(&ge->lr_tdr); + /* Confirm no work left behind accessing device structures */ + cancel_delayed_work_sync(&ge->sched.base.work_tdr); release_guc_id(guc, q); xe_sched_entity_fini(&ge->entity); xe_sched_fini(&ge->sched); diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index 32f5a67a917b..08552ee3fb94 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -757,19 +757,7 @@ int xe_irq_install(struct xe_device *xe) xe_irq_postinstall(xe); - err = devm_add_action_or_reset(xe->drm.dev, irq_uninstall, xe); - if (err) - goto free_irq_handler; - - return 0; - -free_irq_handler: - if (xe_device_has_msix(xe)) - xe_irq_msix_free(xe); - else - xe_irq_msi_free(xe); - - return err; + return devm_add_action_or_reset(xe->drm.dev, irq_uninstall, xe); } static void xe_irq_msi_synchronize_irq(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index eeb96b5f49e2..eb6cd91e1e22 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -237,7 +237,6 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) u32 tail, hw_tail, partial_report_size, available; int report_size = stream->oa_buffer.format->size; unsigned long flags; - bool pollin; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); @@ -282,11 +281,11 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) stream->oa_buffer.tail = tail; available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head); - pollin = available >= stream->wait_num_reports * report_size; + stream->pollin = available >= stream->wait_num_reports * report_size; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - return pollin; + return stream->pollin; } static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) @@ -294,10 +293,8 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) struct xe_oa_stream *stream = container_of(hrtimer, typeof(*stream), poll_check_timer); - if (xe_oa_buffer_check_unlocked(stream)) { - stream->pollin = true; + if (xe_oa_buffer_check_unlocked(stream)) wake_up(&stream->poll_wq); - } hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); @@ -452,6 +449,12 @@ static u32 __oa_ccs_select(struct xe_oa_stream *stream) return val; } +static u32 __oactrl_used_bits(struct xe_oa_stream *stream) +{ + return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? + OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS; +} + static void xe_oa_enable(struct xe_oa_stream *stream) { const struct xe_oa_format *format = stream->oa_buffer.format; @@ -472,14 +475,14 @@ static void xe_oa_enable(struct xe_oa_stream *stream) stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) val |= OAG_OACONTROL_OA_PES_DISAG_EN; - xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val); + xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val); } static void xe_oa_disable(struct xe_oa_stream *stream) { struct xe_mmio *mmio = &stream->gt->mmio; - xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0); + xe_mmio_rmw32(mmio, __oa_regs(stream)->oa_ctrl, __oactrl_used_bits(stream), 0); if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl, OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) drm_err(&stream->oa->xe->drm, @@ -1686,7 +1689,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; stream->sample = param->sample; - stream->periodic = param->period_exponent > 0; + stream->periodic = param->period_exponent >= 0; stream->period_exponent = param->period_exponent; stream->no_preempt = param->no_preempt; stream->wait_num_reports = param->wait_num_reports; @@ -1967,6 +1970,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f } param.xef = xef; + param.period_exponent = -1; ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, ¶m); if (ret) return ret; @@ -2021,7 +2025,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - if (param.period_exponent > 0) { + if (param.period_exponent >= 0) { u64 oa_period, oa_freq_hz; /* Requesting samples from OAG buffer is a privileged operation */ @@ -2534,6 +2538,8 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) u->type = DRM_XE_OA_UNIT_TYPE_OAM; } + xe_mmio_write32(>->mmio, u->regs.oa_ctrl, 0); + /* Ensure MMIO trigger remains disabled till there is a stream */ xe_mmio_write32(>->mmio, u->regs.oa_debug, oag_configure_mmio_trigger(NULL, false)); diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c index 423856cc18d4..d414421f8c13 100644 --- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c @@ -57,12 +57,35 @@ bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe) return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe); } +static u32 get_wopcm_size(struct xe_device *xe) +{ + u32 wopcm_size; + u64 val; + + val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED); + val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); + + switch (val) { + case 0x5 ... 0x6: + val--; + fallthrough; + case 0x0 ... 0x3: + wopcm_size = (1U << val) * SZ_1M; + break; + default: + WARN(1, "Missing case wopcm_size=%llx\n", val); + wopcm_size = 0; + } + + return wopcm_size; +} + static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct xe_tile *tile = xe_device_get_root_tile(xe); struct xe_mmio *mmio = xe_root_tile_mmio(xe); struct pci_dev *pdev = to_pci_dev(xe->drm.dev); - u64 stolen_size; + u64 stolen_size, wopcm_size; u64 tile_offset; u64 tile_size; @@ -74,7 +97,13 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base)) return 0; + /* Carve out the top of DSM as it contains the reserved WOPCM region */ + wopcm_size = get_wopcm_size(xe); + if (drm_WARN_ON(&xe->drm, !wopcm_size)) + return 0; + stolen_size = tile_size - mgr->stolen_base; + stolen_size -= wopcm_size; /* Verify usage fits in the actual resource available */ if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR)) @@ -89,29 +118,6 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) return ALIGN_DOWN(stolen_size, SZ_1M); } -static u32 get_wopcm_size(struct xe_device *xe) -{ - u32 wopcm_size; - u64 val; - - val = xe_mmio_read64_2x32(xe_root_tile_mmio(xe), STOLEN_RESERVED); - val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val); - - switch (val) { - case 0x5 ... 0x6: - val--; - fallthrough; - case 0x0 ... 0x3: - wopcm_size = (1U << val) * SZ_1M; - break; - default: - WARN(1, "Missing case wopcm_size=%llx\n", val); - wopcm_size = 0; - } - - return wopcm_size; -} - static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr) { struct pci_dev *pdev = to_pci_dev(xe->drm.dev); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 690330352d4c..30259eba450b 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -666,20 +666,33 @@ int xe_vm_userptr_pin(struct xe_vm *vm) /* Collect invalidated userptrs */ spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&vm->userptr.repin_list)); list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated, userptr.invalidate_link) { list_del_init(&uvma->userptr.invalidate_link); - list_move_tail(&uvma->userptr.repin_link, - &vm->userptr.repin_list); + list_add_tail(&uvma->userptr.repin_link, + &vm->userptr.repin_list); } spin_unlock(&vm->userptr.invalidated_lock); - /* Pin and move to temporary list */ + /* Pin and move to bind list */ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, userptr.repin_link) { err = xe_vma_userptr_pin_pages(uvma); if (err == -EFAULT) { list_del_init(&uvma->userptr.repin_link); + /* + * We might have already done the pin once already, but + * then had to retry before the re-bind happened, due + * some other condition in the caller, but in the + * meantime the userptr got dinged by the notifier such + * that we need to revalidate here, but this time we hit + * the EFAULT. In such a case make sure we remove + * ourselves from the rebind list to avoid going down in + * flames. + */ + if (!list_empty(&uvma->vma.combined_links.rebind)) + list_del_init(&uvma->vma.combined_links.rebind); /* Wait for pending binds */ xe_vm_lock(vm, false); @@ -690,10 +703,10 @@ int xe_vm_userptr_pin(struct xe_vm *vm) err = xe_vm_invalidate_vma(&uvma->vma); xe_vm_unlock(vm); if (err) - return err; + break; } else { - if (err < 0) - return err; + if (err) + break; list_del_init(&uvma->userptr.repin_link); list_move_tail(&uvma->vma.combined_links.rebind, @@ -701,7 +714,19 @@ int xe_vm_userptr_pin(struct xe_vm *vm) } } - return 0; + if (err) { + down_write(&vm->userptr.notifier_lock); + spin_lock(&vm->userptr.invalidated_lock); + list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list, + userptr.repin_link) { + list_del_init(&uvma->userptr.repin_link); + list_move_tail(&uvma->userptr.invalidate_link, + &vm->userptr.invalidated); + } + spin_unlock(&vm->userptr.invalidated_lock); + up_write(&vm->userptr.notifier_lock); + } + return err; } /** @@ -1066,6 +1091,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence) xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED); spin_lock(&vm->userptr.invalidated_lock); + xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link)); list_del(&to_userptr_vma(vma)->userptr.invalidate_link); spin_unlock(&vm->userptr.invalidated_lock); } else if (!xe_vma_is_null(vma)) { diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 7b1d091f3c09..46cae925b095 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -619,6 +619,8 @@ static int host1x_probe(struct platform_device *pdev) goto free_contexts; } + mutex_init(&host->intr_mutex); + pm_runtime_enable(&pdev->dev); err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index b3285dd10180..f77a678949e9 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -104,8 +104,6 @@ int host1x_intr_init(struct host1x *host) unsigned int id; int i, err; - mutex_init(&host->intr_mutex); - for (id = 0; id < host1x_syncpt_nb_pts(host); ++id) { struct host1x_syncpt *syncpt = &host->syncpt[id]; diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index b53eb569bd49..dfc245867a46 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -570,6 +570,8 @@ config HID_LED config HID_LENOVO tristate "Lenovo / Thinkpad devices" + depends on ACPI + select ACPI_PLATFORM_PROFILE select NEW_LEDS select LEDS_CLASS help @@ -1167,7 +1169,8 @@ config HID_TOPRE tristate "Topre REALFORCE keyboards" depends on HID help - Say Y for N-key rollover support on Topre REALFORCE R2 108/87 key keyboards. + Say Y for N-key rollover support on Topre REALFORCE R2 108/87 key and + Topre REALFORCE R3S 87 key keyboards. config HID_THINGM tristate "ThingM blink(1) USB RGB LED" @@ -1374,10 +1377,6 @@ endmenu source "drivers/hid/bpf/Kconfig" -endif # HID - -source "drivers/hid/usbhid/Kconfig" - source "drivers/hid/i2c-hid/Kconfig" source "drivers/hid/intel-ish-hid/Kconfig" @@ -1388,4 +1387,10 @@ source "drivers/hid/surface-hid/Kconfig" source "drivers/hid/intel-thc-hid/Kconfig" +endif # HID + +# USB support may be used with HID disabled + +source "drivers/hid/usbhid/Kconfig" + endif # HID_SUPPORT diff --git a/drivers/hid/amd-sfh-hid/Kconfig b/drivers/hid/amd-sfh-hid/Kconfig index 329de5e12c1a..3291786a5ee6 100644 --- a/drivers/hid/amd-sfh-hid/Kconfig +++ b/drivers/hid/amd-sfh-hid/Kconfig @@ -5,7 +5,6 @@ menu "AMD SFH HID Support" config AMD_SFH_HID tristate "AMD Sensor Fusion Hub" - depends on HID depends on X86 help If you say yes to this option, support will be included for the diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 7e1ae2a2bcc2..49812a76b7ed 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -474,6 +474,7 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input, hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2015) table = magic_keyboard_2015_fn_keys; else if (hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 || + hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024 || hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 || hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021) table = apple2021_fn_keys; @@ -545,6 +546,9 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input, } } + if (usage->hid == 0xc0301) /* Omoton KB066 quirk */ + code = KEY_F6; + if (usage->code != code) { input_event_with_scancode(input, usage->type, code, usage->hid, value); @@ -1150,6 +1154,10 @@ static const struct hid_device_id apple_devices[] = { .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024), + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, + { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024), + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021), diff --git a/drivers/hid/hid-corsair-void.c b/drivers/hid/hid-corsair-void.c index 6ece56b850fc..56e858066c3c 100644 --- a/drivers/hid/hid-corsair-void.c +++ b/drivers/hid/hid-corsair-void.c @@ -553,7 +553,7 @@ static void corsair_void_battery_remove_work_handler(struct work_struct *work) static void corsair_void_battery_add_work_handler(struct work_struct *work) { struct corsair_void_drvdata *drvdata; - struct power_supply_config psy_cfg; + struct power_supply_config psy_cfg = {}; struct power_supply *new_supply; drvdata = container_of(work, struct corsair_void_drvdata, @@ -726,6 +726,7 @@ static void corsair_void_remove(struct hid_device *hid_dev) if (drvdata->battery) power_supply_unregister(drvdata->battery); + cancel_delayed_work_sync(&drvdata->delayed_status_work); cancel_delayed_work_sync(&drvdata->delayed_firmware_work); sysfs_remove_group(&hid_dev->dev.kobj, &corsair_void_attr_group); } diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index c448de53bf91..7e400624908e 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -184,6 +184,7 @@ #define USB_DEVICE_ID_APPLE_IRCONTROL4 0x8242 #define USB_DEVICE_ID_APPLE_IRCONTROL5 0x8243 #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 0x029c +#define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2024 0x0320 #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 0x029a #define USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021 0x029f #define USB_DEVICE_ID_APPLE_TOUCHBAR_BACKLIGHT 0x8102 @@ -1095,6 +1096,7 @@ #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3001 0x3001 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3003 0x3003 #define USB_DEVICE_ID_QUANTA_OPTICAL_TOUCH_3008 0x3008 +#define USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473 0x5473 #define I2C_VENDOR_ID_RAYDIUM 0x2386 #define I2C_PRODUCT_ID_RAYDIUM_4B33 0x4b33 @@ -1301,6 +1303,7 @@ #define USB_VENDOR_ID_TOPRE 0x0853 #define USB_DEVICE_ID_TOPRE_REALFORCE_R2_108 0x0148 #define USB_DEVICE_ID_TOPRE_REALFORCE_R2_87 0x0146 +#define USB_DEVICE_ID_TOPRE_REALFORCE_R3S_87 0x0313 #define USB_VENDOR_ID_TOPSEED 0x0766 #define USB_DEVICE_ID_TOPSEED_CYBERLINK 0x0204 diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 4d00bc4d656e..a7d9ca02779e 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -32,9 +32,7 @@ #include <linux/leds.h> #include <linux/workqueue.h> -#if IS_ENABLED(CONFIG_ACPI_PLATFORM_PROFILE) #include <linux/platform_profile.h> -#endif /* CONFIG_ACPI_PLATFORM_PROFILE */ #include "hid-ids.h" @@ -730,13 +728,10 @@ static int lenovo_raw_event_TP_X12_tab(struct hid_device *hdev, u32 raw_data) if (hdev->product == USB_DEVICE_ID_LENOVO_X12_TAB) { report_key_event(input, KEY_RFKILL); return 1; - } -#if IS_ENABLED(CONFIG_ACPI_PLATFORM_PROFILE) - else { + } else { platform_profile_cycle(); return 1; } -#endif /* CONFIG_ACPI_PLATFORM_PROFILE */ return 0; case TP_X12_RAW_HOTKEY_FN_F10: /* TAB1 has PICKUP Phone and TAB2 use Snipping tool*/ diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 82900857bfd8..e50887a6d22c 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1679,9 +1679,12 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) break; } - if (suffix) + if (suffix) { hi->input->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name, suffix); + if (!hi->input->name) + return -ENOMEM; + } return 0; } diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index e0bbf0c6345d..5d7a418ccdbe 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -891,6 +891,7 @@ static const struct hid_device_id hid_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_DPAD) }, #endif { HID_USB_DEVICE(USB_VENDOR_ID_YEALINK, USB_DEVICE_ID_YEALINK_P1K_P4K_B2K) }, + { HID_USB_DEVICE(USB_VENDOR_ID_QUANTA, USB_DEVICE_ID_QUANTA_HP_5MP_CAMERA_5473) }, { } }; diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index af38fc8eb34f..c9e65e9088b3 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -313,6 +313,7 @@ struct steam_device { u16 rumble_left; u16 rumble_right; unsigned int sensor_timestamp_us; + struct work_struct unregister_work; }; static int steam_recv_report(struct steam_device *steam, @@ -1050,10 +1051,10 @@ static void steam_mode_switch_cb(struct work_struct *work) struct steam_device, mode_switch); unsigned long flags; bool client_opened; - steam->gamepad_mode = !steam->gamepad_mode; if (!lizard_mode) return; + steam->gamepad_mode = !steam->gamepad_mode; if (steam->gamepad_mode) steam_set_lizard_mode(steam, false); else { @@ -1072,6 +1073,31 @@ static void steam_mode_switch_cb(struct work_struct *work) } } +static void steam_work_unregister_cb(struct work_struct *work) +{ + struct steam_device *steam = container_of(work, struct steam_device, + unregister_work); + unsigned long flags; + bool connected; + bool opened; + + spin_lock_irqsave(&steam->lock, flags); + opened = steam->client_opened; + connected = steam->connected; + spin_unlock_irqrestore(&steam->lock, flags); + + if (connected) { + if (opened) { + steam_sensors_unregister(steam); + steam_input_unregister(steam); + } else { + steam_set_lizard_mode(steam, lizard_mode); + steam_input_register(steam); + steam_sensors_register(steam); + } + } +} + static bool steam_is_valve_interface(struct hid_device *hdev) { struct hid_report_enum *rep_enum; @@ -1117,8 +1143,7 @@ static int steam_client_ll_open(struct hid_device *hdev) steam->client_opened++; spin_unlock_irqrestore(&steam->lock, flags); - steam_sensors_unregister(steam); - steam_input_unregister(steam); + schedule_work(&steam->unregister_work); return 0; } @@ -1135,11 +1160,7 @@ static void steam_client_ll_close(struct hid_device *hdev) connected = steam->connected && !steam->client_opened; spin_unlock_irqrestore(&steam->lock, flags); - if (connected) { - steam_set_lizard_mode(steam, lizard_mode); - steam_input_register(steam); - steam_sensors_register(steam); - } + schedule_work(&steam->unregister_work); } static int steam_client_ll_raw_request(struct hid_device *hdev, @@ -1231,6 +1252,7 @@ static int steam_probe(struct hid_device *hdev, INIT_LIST_HEAD(&steam->list); INIT_WORK(&steam->rumble_work, steam_haptic_rumble_cb); steam->sensor_timestamp_us = 0; + INIT_WORK(&steam->unregister_work, steam_work_unregister_cb); /* * With the real steam controller interface, do not connect hidraw. @@ -1291,6 +1313,7 @@ err_cancel_work: cancel_work_sync(&steam->work_connect); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->rumble_work); + cancel_work_sync(&steam->unregister_work); return ret; } @@ -1307,6 +1330,7 @@ static void steam_remove(struct hid_device *hdev) cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->work_connect); cancel_work_sync(&steam->rumble_work); + cancel_work_sync(&steam->unregister_work); hid_destroy_device(steam->client_hdev); steam->client_hdev = NULL; steam->client_opened = 0; @@ -1593,13 +1617,13 @@ static void steam_do_deck_input_event(struct steam_device *steam, if (!(b9 & BIT(6)) && steam->did_mode_switch) { steam->did_mode_switch = false; - cancel_delayed_work_sync(&steam->mode_switch); + cancel_delayed_work(&steam->mode_switch); } else if (!steam->client_opened && (b9 & BIT(6)) && !steam->did_mode_switch) { steam->did_mode_switch = true; schedule_delayed_work(&steam->mode_switch, 45 * HZ / 100); } - if (!steam->gamepad_mode) + if (!steam->gamepad_mode && lizard_mode) return; lpad_touched = b10 & BIT(3); @@ -1669,7 +1693,7 @@ static void steam_do_deck_sensors_event(struct steam_device *steam, */ steam->sensor_timestamp_us += 4000; - if (!steam->gamepad_mode) + if (!steam->gamepad_mode && lizard_mode) return; input_event(sensors, EV_MSC, MSC_TIMESTAMP, steam->sensor_timestamp_us); diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c index 6c3e758bbb09..3b81468a1df2 100644 --- a/drivers/hid/hid-thrustmaster.c +++ b/drivers/hid/hid-thrustmaster.c @@ -171,7 +171,7 @@ static void thrustmaster_interrupts(struct hid_device *hdev) b_ep = ep->desc.bEndpointAddress; /* Are the expected endpoints present? */ - u8 ep_addr[1] = {b_ep}; + u8 ep_addr[2] = {b_ep, 0}; if (!usb_check_int_endpoints(usbif, ep_addr)) { hid_err(hdev, "Unexpected non-int endpoint\n"); diff --git a/drivers/hid/hid-topre.c b/drivers/hid/hid-topre.c index 848361f6225d..ccedf8721722 100644 --- a/drivers/hid/hid-topre.c +++ b/drivers/hid/hid-topre.c @@ -29,6 +29,11 @@ static const __u8 *topre_report_fixup(struct hid_device *hdev, __u8 *rdesc, hid_info(hdev, "fixing up Topre REALFORCE keyboard report descriptor\n"); rdesc[72] = 0x02; + } else if (*rsize >= 106 && rdesc[28] == 0x29 && rdesc[29] == 0xe7 && + rdesc[30] == 0x81 && rdesc[31] == 0x00) { + hid_info(hdev, + "fixing up Topre REALFORCE keyboard report descriptor\n"); + rdesc[31] = 0x02; } return rdesc; } @@ -38,6 +43,8 @@ static const struct hid_device_id topre_id_table[] = { USB_DEVICE_ID_TOPRE_REALFORCE_R2_108) }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE, USB_DEVICE_ID_TOPRE_REALFORCE_R2_87) }, + { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE, + USB_DEVICE_ID_TOPRE_REALFORCE_R3S_87) }, { } }; MODULE_DEVICE_TABLE(hid, topre_id_table); diff --git a/drivers/hid/hid-winwing.c b/drivers/hid/hid-winwing.c index 831b760c66ea..d4afbbd27807 100644 --- a/drivers/hid/hid-winwing.c +++ b/drivers/hid/hid-winwing.c @@ -106,6 +106,8 @@ static int winwing_init_led(struct hid_device *hdev, "%s::%s", dev_name(&input->dev), info->led_name); + if (!led->cdev.name) + return -ENOMEM; ret = devm_led_classdev_register(&hdev->dev, &led->cdev); if (ret) diff --git a/drivers/hid/i2c-hid/Kconfig b/drivers/hid/i2c-hid/Kconfig index ef7c595c9403..e8d51f410cc1 100644 --- a/drivers/hid/i2c-hid/Kconfig +++ b/drivers/hid/i2c-hid/Kconfig @@ -2,7 +2,7 @@ menuconfig I2C_HID tristate "I2C HID support" default y - depends on I2C && INPUT && HID + depends on I2C if I2C_HID diff --git a/drivers/hid/intel-ish-hid/Kconfig b/drivers/hid/intel-ish-hid/Kconfig index 253dc10d35ef..568c8688784e 100644 --- a/drivers/hid/intel-ish-hid/Kconfig +++ b/drivers/hid/intel-ish-hid/Kconfig @@ -6,7 +6,6 @@ config INTEL_ISH_HID tristate "Intel Integrated Sensor Hub" default n depends on X86 - depends on HID help The Integrated Sensor Hub (ISH) enables the ability to offload sensor polling and algorithm processing to a dedicated low power diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h index cdd80c653918..07e90d51f073 100644 --- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h +++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h @@ -36,6 +36,8 @@ #define PCI_DEVICE_ID_INTEL_ISH_ARL_H 0x7745 #define PCI_DEVICE_ID_INTEL_ISH_ARL_S 0x7F78 #define PCI_DEVICE_ID_INTEL_ISH_LNL_M 0xA845 +#define PCI_DEVICE_ID_INTEL_ISH_PTL_H 0xE345 +#define PCI_DEVICE_ID_INTEL_ISH_PTL_P 0xE445 #define REVISION_ID_CHT_A0 0x6 #define REVISION_ID_CHT_Ax_SI 0x0 diff --git a/drivers/hid/intel-ish-hid/ipc/ipc.c b/drivers/hid/intel-ish-hid/ipc/ipc.c index 3cd53fc80634..4c861119e97a 100644 --- a/drivers/hid/intel-ish-hid/ipc/ipc.c +++ b/drivers/hid/intel-ish-hid/ipc/ipc.c @@ -517,6 +517,10 @@ static int ish_fw_reset_handler(struct ishtp_device *dev) /* ISH FW is dead */ if (!ish_is_input_ready(dev)) return -EPIPE; + + /* Send clock sync at once after reset */ + ishtp_dev->prev_sync = 0; + /* * Set HOST2ISH.ILUP. Apparently we need this BEFORE sending * RESET_NOTIFY_ACK - FW will be checking for it @@ -577,15 +581,14 @@ static void fw_reset_work_fn(struct work_struct *work) */ static void _ish_sync_fw_clock(struct ishtp_device *dev) { - static unsigned long prev_sync; - uint64_t usec; + struct ipc_time_update_msg time = {}; - if (prev_sync && time_before(jiffies, prev_sync + 20 * HZ)) + if (dev->prev_sync && time_before(jiffies, dev->prev_sync + 20 * HZ)) return; - prev_sync = jiffies; - usec = ktime_to_us(ktime_get_boottime()); - ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &usec, sizeof(uint64_t)); + dev->prev_sync = jiffies; + /* The fields of time would be updated while sending message */ + ipc_send_mng_msg(dev, MNG_SYNC_FW_CLOCK, &time, sizeof(time)); } /** diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 9e2401291a2f..ff0fc8010072 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -26,9 +26,11 @@ enum ishtp_driver_data_index { ISHTP_DRIVER_DATA_NONE, ISHTP_DRIVER_DATA_LNL_M, + ISHTP_DRIVER_DATA_PTL, }; #define ISH_FW_GEN_LNL_M "lnlm" +#define ISH_FW_GEN_PTL "ptl" #define ISH_FIRMWARE_PATH(gen) "intel/ish/ish_" gen ".bin" #define ISH_FIRMWARE_PATH_ALL "intel/ish/ish_*.bin" @@ -37,6 +39,9 @@ static struct ishtp_driver_data ishtp_driver_data[] = { [ISHTP_DRIVER_DATA_LNL_M] = { .fw_generation = ISH_FW_GEN_LNL_M, }, + [ISHTP_DRIVER_DATA_PTL] = { + .fw_generation = ISH_FW_GEN_PTL, + }, }; static const struct pci_device_id ish_pci_tbl[] = { @@ -63,6 +68,8 @@ static const struct pci_device_id ish_pci_tbl[] = { {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_H)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_ARL_S)}, {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_LNL_M), .driver_data = ISHTP_DRIVER_DATA_LNL_M}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_H), .driver_data = ISHTP_DRIVER_DATA_PTL}, + {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ISH_PTL_P), .driver_data = ISHTP_DRIVER_DATA_PTL}, {} }; MODULE_DEVICE_TABLE(pci, ish_pci_tbl); diff --git a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h index 44eddc411e97..ec9f6e87aaf2 100644 --- a/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h +++ b/drivers/hid/intel-ish-hid/ishtp/ishtp-dev.h @@ -253,6 +253,8 @@ struct ishtp_device { unsigned int ipc_tx_cnt; unsigned long long ipc_tx_bytes_cnt; + /* Time of the last clock sync */ + unsigned long prev_sync; const struct ishtp_hw_ops *ops; size_t mtu; uint32_t ishtp_msg_hdr; diff --git a/drivers/hid/intel-thc-hid/Kconfig b/drivers/hid/intel-thc-hid/Kconfig index 91ec84902db8..0351d1137607 100644 --- a/drivers/hid/intel-thc-hid/Kconfig +++ b/drivers/hid/intel-thc-hid/Kconfig @@ -7,7 +7,6 @@ menu "Intel THC HID Support" config INTEL_THC_HID tristate "Intel Touch Host Controller" depends on ACPI - select HID help THC (Touch Host Controller) is the name of the IP block in PCH that interfaces with Touch Devices (ex: touchscreen, touchpad etc.). It diff --git a/drivers/hid/surface-hid/Kconfig b/drivers/hid/surface-hid/Kconfig index 7ce9b5d641eb..d0cfd0d29926 100644 --- a/drivers/hid/surface-hid/Kconfig +++ b/drivers/hid/surface-hid/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ menu "Surface System Aggregator Module HID support" depends on SURFACE_AGGREGATOR - depends on INPUT config SURFACE_HID tristate "HID transport driver for Surface System Aggregator Module" @@ -39,4 +38,3 @@ endmenu config SURFACE_HID_CORE tristate - select HID diff --git a/drivers/hid/usbhid/Kconfig b/drivers/hid/usbhid/Kconfig index 7c2032f7f44d..f3194767a45e 100644 --- a/drivers/hid/usbhid/Kconfig +++ b/drivers/hid/usbhid/Kconfig @@ -5,8 +5,7 @@ menu "USB HID support" config USB_HID tristate "USB HID transport layer" default y - depends on USB && INPUT - select HID + depends on HID help Say Y here if you want to connect USB keyboards, mice, joysticks, graphic tablets, or any other HID based devices diff --git a/drivers/i2c/busses/i2c-amd-asf-plat.c b/drivers/i2c/busses/i2c-amd-asf-plat.c index 7512614bf4b7..93ebec162c6d 100644 --- a/drivers/i2c/busses/i2c-amd-asf-plat.c +++ b/drivers/i2c/busses/i2c-amd-asf-plat.c @@ -293,6 +293,7 @@ static irqreturn_t amd_asf_irq_handler(int irq, void *ptr) amd_asf_update_ioport_target(piix4_smba, ASF_SLV_INTR, SMBHSTSTS, true); } + iowrite32(irq, dev->eoi_base); return IRQ_HANDLED; } diff --git a/drivers/i2c/busses/i2c-ls2x.c b/drivers/i2c/busses/i2c-ls2x.c index 8821cac3897b..b475dd27b7af 100644 --- a/drivers/i2c/busses/i2c-ls2x.c +++ b/drivers/i2c/busses/i2c-ls2x.c @@ -10,6 +10,7 @@ * Rewritten for mainline by Binbin Zhou <zhoubinbin@loongson.cn> */ +#include <linux/bitfield.h> #include <linux/bits.h> #include <linux/completion.h> #include <linux/device.h> @@ -26,7 +27,8 @@ #include <linux/units.h> /* I2C Registers */ -#define I2C_LS2X_PRER 0x0 /* Freq Division Register(16 bits) */ +#define I2C_LS2X_PRER_LO 0x0 /* Freq Division Low Byte Register */ +#define I2C_LS2X_PRER_HI 0x1 /* Freq Division High Byte Register */ #define I2C_LS2X_CTR 0x2 /* Control Register */ #define I2C_LS2X_TXR 0x3 /* Transport Data Register */ #define I2C_LS2X_RXR 0x3 /* Receive Data Register */ @@ -93,6 +95,7 @@ static irqreturn_t ls2x_i2c_isr(int this_irq, void *dev_id) */ static void ls2x_i2c_adjust_bus_speed(struct ls2x_i2c_priv *priv) { + u16 val; struct i2c_timings *t = &priv->i2c_t; struct device *dev = priv->adapter.dev.parent; u32 acpi_speed = i2c_acpi_find_bus_speed(dev); @@ -104,9 +107,14 @@ static void ls2x_i2c_adjust_bus_speed(struct ls2x_i2c_priv *priv) else t->bus_freq_hz = LS2X_I2C_FREQ_STD; - /* Calculate and set i2c frequency. */ - writew(LS2X_I2C_PCLK_FREQ / (5 * t->bus_freq_hz) - 1, - priv->base + I2C_LS2X_PRER); + /* + * According to the chip manual, we can only access the registers as bytes, + * otherwise the high bits will be truncated. + * So set the I2C frequency with a sequential writeb() instead of writew(). + */ + val = LS2X_I2C_PCLK_FREQ / (5 * t->bus_freq_hz) - 1; + writeb(FIELD_GET(GENMASK(7, 0), val), priv->base + I2C_LS2X_PRER_LO); + writeb(FIELD_GET(GENMASK(15, 8), val), priv->base + I2C_LS2X_PRER_HI); } static void ls2x_i2c_init(struct ls2x_i2c_priv *priv) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 3ca08b8ef8af..de713b5747fe 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2554,6 +2554,13 @@ static int npcm_i2c_probe_bus(struct platform_device *pdev) if (irq < 0) return irq; + /* + * Disable the interrupt to avoid the interrupt handler being triggered + * incorrectly by the asynchronous interrupt status since the machine + * might do a warm reset during the last smbus/i2c transfer session. + */ + npcm_i2c_int_enable(bus, false); + ret = devm_request_irq(bus->dev, irq, npcm_i2c_bus_irq, 0, dev_name(bus->dev), bus); if (ret) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 5546184df05f..7ad1ad5c8c3f 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1300,12 +1300,14 @@ new_device_store(struct device *dev, struct device_attribute *attr, info.flags |= I2C_CLIENT_SLAVE; } - info.flags |= I2C_CLIENT_USER; - client = i2c_new_client_device(adap, &info); if (IS_ERR(client)) return PTR_ERR(client); + /* Keep track of the added device */ + mutex_lock(&adap->userspace_clients_lock); + list_add_tail(&client->detected, &adap->userspace_clients); + mutex_unlock(&adap->userspace_clients_lock); dev_info(dev, "%s: Instantiated device %s at 0x%02hx\n", "new_device", info.type, info.addr); @@ -1313,15 +1315,6 @@ new_device_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_WO(new_device); -static int __i2c_find_user_addr(struct device *dev, const void *addrp) -{ - struct i2c_client *client = i2c_verify_client(dev); - unsigned short addr = *(unsigned short *)addrp; - - return client && client->flags & I2C_CLIENT_USER && - i2c_encode_flags_to_addr(client) == addr; -} - /* * And of course let the users delete the devices they instantiated, if * they got it wrong. This interface can only be used to delete devices @@ -1336,7 +1329,7 @@ delete_device_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct i2c_adapter *adap = to_i2c_adapter(dev); - struct device *child_dev; + struct i2c_client *client, *next; unsigned short addr; char end; int res; @@ -1352,19 +1345,28 @@ delete_device_store(struct device *dev, struct device_attribute *attr, return -EINVAL; } - mutex_lock(&core_lock); /* Make sure the device was added through sysfs */ - child_dev = device_find_child(&adap->dev, &addr, __i2c_find_user_addr); - if (child_dev) { - i2c_unregister_device(i2c_verify_client(child_dev)); - put_device(child_dev); - } else { - dev_err(dev, "Can't find userspace-created device at %#x\n", addr); - count = -ENOENT; + res = -ENOENT; + mutex_lock_nested(&adap->userspace_clients_lock, + i2c_adapter_depth(adap)); + list_for_each_entry_safe(client, next, &adap->userspace_clients, + detected) { + if (i2c_encode_flags_to_addr(client) == addr) { + dev_info(dev, "%s: Deleting device %s at 0x%02hx\n", + "delete_device", client->name, client->addr); + + list_del(&client->detected); + i2c_unregister_device(client); + res = count; + break; + } } - mutex_unlock(&core_lock); + mutex_unlock(&adap->userspace_clients_lock); - return count; + if (res < 0) + dev_err(dev, "%s: Can't find device in list\n", + "delete_device"); + return res; } static DEVICE_ATTR_IGNORE_LOCKDEP(delete_device, S_IWUSR, NULL, delete_device_store); @@ -1535,6 +1537,8 @@ static int i2c_register_adapter(struct i2c_adapter *adap) adap->locked_flags = 0; rt_mutex_init(&adap->bus_lock); rt_mutex_init(&adap->mux_lock); + mutex_init(&adap->userspace_clients_lock); + INIT_LIST_HEAD(&adap->userspace_clients); /* Set default timeout to 1 second if not already set */ if (adap->timeout == 0) @@ -1700,6 +1704,23 @@ int i2c_add_numbered_adapter(struct i2c_adapter *adap) } EXPORT_SYMBOL_GPL(i2c_add_numbered_adapter); +static void i2c_do_del_adapter(struct i2c_driver *driver, + struct i2c_adapter *adapter) +{ + struct i2c_client *client, *_n; + + /* Remove the devices we created ourselves as the result of hardware + * probing (using a driver's detect method) */ + list_for_each_entry_safe(client, _n, &driver->clients, detected) { + if (client->adapter == adapter) { + dev_dbg(&adapter->dev, "Removing %s at 0x%x\n", + client->name, client->addr); + list_del(&client->detected); + i2c_unregister_device(client); + } + } +} + static int __unregister_client(struct device *dev, void *dummy) { struct i2c_client *client = i2c_verify_client(dev); @@ -1715,6 +1736,12 @@ static int __unregister_dummy(struct device *dev, void *dummy) return 0; } +static int __process_removed_adapter(struct device_driver *d, void *data) +{ + i2c_do_del_adapter(to_i2c_driver(d), data); + return 0; +} + /** * i2c_del_adapter - unregister I2C adapter * @adap: the adapter being unregistered @@ -1726,6 +1753,7 @@ static int __unregister_dummy(struct device *dev, void *dummy) void i2c_del_adapter(struct i2c_adapter *adap) { struct i2c_adapter *found; + struct i2c_client *client, *next; /* First make sure that this adapter was ever added */ mutex_lock(&core_lock); @@ -1737,16 +1765,31 @@ void i2c_del_adapter(struct i2c_adapter *adap) } i2c_acpi_remove_space_handler(adap); + /* Tell drivers about this removal */ + mutex_lock(&core_lock); + bus_for_each_drv(&i2c_bus_type, NULL, adap, + __process_removed_adapter); + mutex_unlock(&core_lock); + + /* Remove devices instantiated from sysfs */ + mutex_lock_nested(&adap->userspace_clients_lock, + i2c_adapter_depth(adap)); + list_for_each_entry_safe(client, next, &adap->userspace_clients, + detected) { + dev_dbg(&adap->dev, "Removing %s at 0x%x\n", client->name, + client->addr); + list_del(&client->detected); + i2c_unregister_device(client); + } + mutex_unlock(&adap->userspace_clients_lock); /* Detach any active clients. This can't fail, thus we do not * check the returned value. This is a two-pass process, because * we can't remove the dummy devices during the first pass: they * could have been instantiated by real devices wishing to clean * them up properly, so we give them a chance to do that first. */ - mutex_lock(&core_lock); device_for_each_child(&adap->dev, NULL, __unregister_client); device_for_each_child(&adap->dev, NULL, __unregister_dummy); - mutex_unlock(&core_lock); /* device name is gone after device_unregister */ dev_dbg(&adap->dev, "adapter [%s] unregistered\n", adap->name); @@ -1966,6 +2009,7 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver) /* add the driver to the list of i2c drivers in the driver core */ driver->driver.owner = owner; driver->driver.bus = &i2c_bus_type; + INIT_LIST_HEAD(&driver->clients); /* When registration returns, the driver core * will have called probe() for all matching-but-unbound devices. @@ -1983,13 +2027,10 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver) } EXPORT_SYMBOL(i2c_register_driver); -static int __i2c_unregister_detected_client(struct device *dev, void *argp) +static int __process_removed_driver(struct device *dev, void *data) { - struct i2c_client *client = i2c_verify_client(dev); - - if (client && client->flags & I2C_CLIENT_AUTO) - i2c_unregister_device(client); - + if (dev->type == &i2c_adapter_type) + i2c_do_del_adapter(data, to_i2c_adapter(dev)); return 0; } @@ -2000,12 +2041,7 @@ static int __i2c_unregister_detected_client(struct device *dev, void *argp) */ void i2c_del_driver(struct i2c_driver *driver) { - mutex_lock(&core_lock); - /* Satisfy __must_check, function can't fail */ - if (driver_for_each_device(&driver->driver, NULL, NULL, - __i2c_unregister_detected_client)) { - } - mutex_unlock(&core_lock); + i2c_for_each_dev(driver, __process_removed_driver); driver_unregister(&driver->driver); pr_debug("driver [%s] unregistered\n", driver->driver.name); @@ -2432,7 +2468,6 @@ static int i2c_detect_address(struct i2c_client *temp_client, /* Finally call the custom detection function */ memset(&info, 0, sizeof(struct i2c_board_info)); info.addr = addr; - info.flags = I2C_CLIENT_AUTO; err = driver->detect(temp_client, &info); if (err) { /* -ENODEV is returned if the detection fails. We catch it @@ -2459,7 +2494,9 @@ static int i2c_detect_address(struct i2c_client *temp_client, dev_dbg(&adapter->dev, "Creating %s at 0x%02x\n", info.type, info.addr); client = i2c_new_client_device(adapter, &info); - if (IS_ERR(client)) + if (!IS_ERR(client)) + list_add_tail(&client->detected, &driver->clients); + else dev_err(&adapter->dev, "Failed creating %s at 0x%02x\n", info.type, info.addr); } @@ -2469,7 +2506,7 @@ static int i2c_detect_address(struct i2c_client *temp_client, static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver) { const unsigned short *address_list; - struct i2c_client temp_client; + struct i2c_client *temp_client; int i, err = 0; address_list = driver->address_list; @@ -2490,19 +2527,24 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver) return 0; /* Set up a temporary client to help detect callback */ - memset(&temp_client, 0, sizeof(temp_client)); - temp_client.adapter = adapter; + temp_client = kzalloc(sizeof(*temp_client), GFP_KERNEL); + if (!temp_client) + return -ENOMEM; + + temp_client->adapter = adapter; for (i = 0; address_list[i] != I2C_CLIENT_END; i += 1) { dev_dbg(&adapter->dev, "found normal entry for adapter %d, addr 0x%02x\n", i2c_adapter_id(adapter), address_list[i]); - temp_client.addr = address_list[i]; - err = i2c_detect_address(&temp_client, driver); + temp_client->addr = address_list[i]; + err = i2c_detect_address(temp_client, driver); if (unlikely(err)) break; } + kfree(temp_client); + return err; } diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 118fe1d37c22..0fdb1d1316c4 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -56,6 +56,7 @@ #include <asm/intel-family.h> #include <asm/mwait.h> #include <asm/spec-ctrl.h> +#include <asm/tsc.h> #include <asm/fpu/api.h> #define INTEL_IDLE_VERSION "0.5.1" @@ -1799,6 +1800,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) if (intel_idle_state_needs_timer_stop(state)) state->flags |= CPUIDLE_FLAG_TIMER_STOP; + if (cx->type > ACPI_STATE_C1 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halts in idle"); + state->enter = intel_idle; state->enter_s2idle = intel_idle_s2idle; } diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index b91a85a491d0..3721446c6ba4 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -187,7 +187,6 @@ struct bnxt_re_dev { #define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 struct net_device *netdev; struct auxiliary_device *adev; - struct notifier_block nb; unsigned int version, major, minor; struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 3ac47f4e6122..f039aefcaf67 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -348,8 +348,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, goto done; } bnxt_re_copy_err_stats(rdev, stats, err_s); - if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags) && - !rdev->is_virtfn) { + if (bnxt_ext_stats_supported(rdev->chip_ctx, rdev->dev_attr->dev_cap_flags, + rdev->is_virtfn)) { rc = bnxt_re_get_ext_stat(rdev, stats); if (rc) { clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 2de101d6e825..6f5db32082dd 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1870,6 +1870,8 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; + srq->qplib_srq.sg_info.pgsize = PAGE_SIZE; + srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT; nq = &rdev->nqr->nq[0]; if (udata) { diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index e9e4da4dd576..a94c8c5387d9 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -396,11 +396,16 @@ free_dcb: static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl) { - struct bnxt_re_dev *rdev = (struct bnxt_re_dev *)handle; + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); struct bnxt_re_dcb_work *dcb_work; + struct bnxt_re_dev *rdev; u32 data1, data2; u16 event_id; + rdev = en_info->rdev; + if (!rdev) + return; + event_id = le16_to_cpu(cmpl->event_id); data1 = le32_to_cpu(cmpl->event_data1); data2 = le32_to_cpu(cmpl->event_data2); @@ -433,6 +438,8 @@ static void bnxt_re_stop_irq(void *handle, bool reset) int indx; rdev = en_info->rdev; + if (!rdev) + return; rcfw = &rdev->rcfw; if (reset) { @@ -461,6 +468,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) int indx, rc; rdev = en_info->rdev; + if (!rdev) + return; msix_ent = rdev->nqr->msix_entries; rcfw = &rdev->rcfw; if (!ent) { @@ -1350,7 +1359,6 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev, return NULL; } /* Default values */ - rdev->nb.notifier_call = NULL; rdev->netdev = en_dev->net; rdev->en_dev = en_dev; rdev->adev = adev; @@ -2345,15 +2353,6 @@ exit: static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type, struct auxiliary_device *aux_dev) { - if (rdev->nb.notifier_call) { - unregister_netdevice_notifier(&rdev->nb); - rdev->nb.notifier_call = NULL; - } else { - /* If notifier is null, we should have already done a - * clean up before coming here. - */ - return; - } bnxt_re_setup_cc(rdev, false); ib_unregister_device(&rdev->ibdev); bnxt_re_dev_uninit(rdev, op_type); @@ -2433,6 +2432,7 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx", __func__, en_dev->en_state); bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev); + bnxt_re_update_en_info_rdev(NULL, en_info, adev); mutex_unlock(&bnxt_re_mutex); return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index be5d907a036b..711990232de1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -547,6 +547,14 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags) CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; } +static inline int bnxt_ext_stats_supported(struct bnxt_qplib_chip_ctx *ctx, + u16 flags, bool virtfn) +{ + /* ext stats supported if cap flag is set AND is a PF OR a Thor2 VF */ + return (_is_ext_stats_supported(flags) && + ((virtfn && bnxt_qplib_is_chip_gen_p7(ctx)) || (!virtfn))); +} + static inline bool _is_hw_retx_supported(u16 dev_cap_flags) { return dev_cap_flags & diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index dded339802b3..160e8927d364 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1286,10 +1286,8 @@ static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout) return tx_timeout; } -static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) +static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout) { - struct hns_roce_v2_priv *priv = hr_dev->priv; - u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); u32 timeout = 0; do { @@ -1299,8 +1297,9 @@ static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) } while (++timeout < tx_timeout); } -static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, - struct hns_roce_cmq_desc *desc, int num) +static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, + int num, u32 tx_timeout) { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; @@ -1309,8 +1308,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, int ret; int i; - spin_lock_bh(&csq->lock); - tail = csq->head; for (i = 0; i < num; i++) { @@ -1324,22 +1321,17 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]); - hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode)); + hns_roce_wait_csq_done(hr_dev, tx_timeout); if (hns_roce_cmq_csq_done(hr_dev)) { ret = 0; for (i = 0; i < num; i++) { /* check the result of hardware write back */ - desc[i] = csq->desc[tail++]; + desc_ret = le16_to_cpu(csq->desc[tail++].retval); if (tail == csq->desc_num) tail = 0; - - desc_ret = le16_to_cpu(desc[i].retval); if (likely(desc_ret == CMD_EXEC_SUCCESS)) continue; - dev_err_ratelimited(hr_dev->dev, - "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", - desc->opcode, desc_ret); ret = hns_roce_cmd_err_convert_errno(desc_ret); } } else { @@ -1354,14 +1346,54 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, ret = -EAGAIN; } - spin_unlock_bh(&csq->lock); - if (ret) atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]); return ret; } +static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; + u16 opcode = le16_to_cpu(desc->opcode); + u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); + u8 try_cnt = HNS_ROCE_OPC_POST_MB_TRY_CNT; + u32 rsv_tail; + int ret; + int i; + + while (try_cnt) { + try_cnt--; + + spin_lock_bh(&csq->lock); + rsv_tail = csq->head; + ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout); + if (opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME && + try_cnt) { + spin_unlock_bh(&csq->lock); + mdelay(HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC); + continue; + } + + for (i = 0; i < num; i++) { + desc[i] = csq->desc[rsv_tail++]; + if (rsv_tail == csq->desc_num) + rsv_tail = 0; + } + spin_unlock_bh(&csq->lock); + break; + } + + if (ret) + dev_err_ratelimited(hr_dev->dev, + "Cmdq IO error, opcode = 0x%x, return = %d.\n", + opcode, ret); + + return ret; +} + static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index cbdbc9edbce6..91a5665465ff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -230,6 +230,8 @@ enum hns_roce_opcode_type { }; #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000 +#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8 +#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5 struct hns_roce_cmdq_tx_timeout_map { u16 opcode; u32 tx_timeout; diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 67c2d43135a8..457cea6d9909 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -174,7 +174,7 @@ static int mana_gd_allocate_doorbell_page(struct gdma_context *gc, req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; req.num_resources = 1; - req.alignment = 1; + req.alignment = PAGE_SIZE / MANA_PAGE_SIZE; /* Have GDMA start searching from 0 */ req.allocated_resources = 0; diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 505bc47fd575..99036afb3aef 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -67,7 +67,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.tclass = grh->traffic_class; } - ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); + ah->av.stat_rate_sl = + (mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4); if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { if (init_attr->xmit_slave) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 4f6c1968a2ee..81cfa74147a1 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -546,6 +546,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, struct ib_qp *qp) { struct mlx5_ib_dev *dev = to_mdev(qp->device); + bool new = false; int err; if (!counter->id) { @@ -560,6 +561,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, return err; counter->id = MLX5_GET(alloc_q_counter_out, out, counter_set_id); + new = true; } err = mlx5_ib_qp_set_counter(qp, counter); @@ -569,8 +571,10 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, return 0; fail_set_counter: - mlx5_ib_counter_dealloc(counter); - counter->id = 0; + if (new) { + mlx5_ib_counter_dealloc(counter); + counter->id = 0; + } return err; } diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bb02b6adbf2c..753faa9ad06a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1550,7 +1550,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); - if (!umem_dmabuf->sgt) + if (!umem_dmabuf->sgt || !mr) return; mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); @@ -1935,7 +1935,8 @@ err: static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr) { - if (!mr->umem && !mr->data_direct && mr->descs) { + if (!mr->umem && !mr->data_direct && + mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) { struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; struct mlx5_ib_dev *dev = to_mdev(device); @@ -2022,11 +2023,16 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; bool is_odp = is_odp_mr(mr); + bool is_odp_dma_buf = is_dmabuf_mr(mr) && + !to_ib_umem_dmabuf(mr->umem)->pinned; int ret = 0; if (is_odp) mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); + if (is_odp_dma_buf) + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL); + if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { ent = mr->mmkey.cache_ent; /* upon storing to a clean temp entry - schedule its cleanup */ @@ -2054,6 +2060,12 @@ out: mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); } + if (is_odp_dma_buf) { + if (!ret) + to_ib_umem_dmabuf(mr->umem)->private = NULL; + dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + } + return ret; } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index f1e23583e6c0..e77c9280c07e 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -242,6 +242,7 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_KERNEL) != mr) { xa_unlock(&imr->implicit_children); + mlx5r_deref_odp_mkey(&imr->mmkey); return; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a43eba9d3572..88724d15705d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3447,11 +3447,11 @@ static int ib_to_mlx5_rate_map(u8 rate) return 0; } -static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) +int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate) { u32 stat_rate_support; - if (rate == IB_RATE_PORT_CURRENT) + if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS) return 0; if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_800_GBPS) @@ -3596,7 +3596,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, sizeof(grh->dgid.raw)); } - err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah)); + err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah)); if (err < 0) return err; MLX5_SET(ads, path, stat_rate, err); @@ -4579,6 +4579,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1); MLX5_SET(dctc, dctc, counter_set_id, set_id); + + qp->port = attr->port_num; } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { struct mlx5_ib_modify_qp_resp resp = {}; u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {}; @@ -5074,7 +5076,7 @@ static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp, } if (qp_attr_mask & IB_QP_PORT) - qp_attr->port_num = MLX5_GET(dctc, dctc, port); + qp_attr->port_num = mqp->port; if (qp_attr_mask & IB_QP_MIN_RNR_TIMER) qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak); if (qp_attr_mask & IB_QP_AV) { diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index b6ee7c3ee1ca..2530e7730635 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -56,4 +56,5 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); int mlx5_ib_qp_event_init(void); void mlx5_ib_qp_event_cleanup(void); +int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate); #endif /* _MLX5_IB_QP_H */ diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 887fd6fa3ba9..793f3c5c4d01 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -231,30 +231,6 @@ void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev) ib_dealloc_pd(dev->umrc.pd); } -static int mlx5r_umr_recover(struct mlx5_ib_dev *dev) -{ - struct umr_common *umrc = &dev->umrc; - struct ib_qp_attr attr; - int err; - - attr.qp_state = IB_QPS_RESET; - err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); - if (err) { - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); - goto err; - } - - err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); - if (err) - goto err; - - umrc->state = MLX5_UMR_STATE_ACTIVE; - return 0; - -err: - umrc->state = MLX5_UMR_STATE_ERR; - return err; -} static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, struct mlx5r_umr_wqe *wqe, bool with_data) @@ -302,6 +278,61 @@ out: return err; } +static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey, + struct mlx5r_umr_context *umr_context, + struct mlx5r_umr_wqe *wqe, bool with_data) +{ + struct umr_common *umrc = &dev->umrc; + struct ib_qp_attr attr; + int err; + + mutex_lock(&umrc->lock); + /* Preventing any further WRs to be sent now */ + if (umrc->state != MLX5_UMR_STATE_RECOVER) { + mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n", + umrc->state); + umrc->state = MLX5_UMR_STATE_RECOVER; + } + mutex_unlock(&umrc->lock); + + /* Sending a final/barrier WR (the failed one) and wait for its completion. + * This will ensure that all the previous WRs got a completion before + * we set the QP state to RESET. + */ + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe, + with_data); + if (err) { + mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err); + goto err; + } + + /* Since the QP is in an error state, it will only receive + * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier + * we don't care about its status. + */ + wait_for_completion(&umr_context->done); + + attr.qp_state = IB_QPS_RESET; + err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); + if (err) { + mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err); + goto err; + } + + err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); + if (err) { + mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err); + goto err; + } + + umrc->state = MLX5_UMR_STATE_ACTIVE; + return 0; + +err: + umrc->state = MLX5_UMR_STATE_ERR; + return err; +} + static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = @@ -366,9 +397,7 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, mlx5_ib_warn(dev, "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n", umr_context.status, mkey); - mutex_lock(&umrc->lock); - err = mlx5r_umr_recover(dev); - mutex_unlock(&umrc->lock); + err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data); if (err) mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", err); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 0bbda60d3cdc..23caea22f8dc 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -175,6 +175,7 @@ #define CONTROL_GAM_EN 25 #define CONTROL_GALOG_EN 28 #define CONTROL_GAINT_EN 29 +#define CONTROL_EPH_EN 45 #define CONTROL_XT_EN 50 #define CONTROL_INTCAPXT_EN 51 #define CONTROL_IRTCACHEDIS 59 diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index c5cd92edada0..cb536d372b12 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2653,6 +2653,10 @@ static void iommu_init_flags(struct amd_iommu *iommu) /* Set IOTLB invalidation timeout to 1s */ iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S); + + /* Enable Enhanced Peripheral Page Request Handling */ + if (check_feature(FEATURE_EPHSUP)) + iommu_feature_enable(iommu, CONTROL_EPH_EN); } static void iommu_apply_resume_quirks(struct amd_iommu *iommu) @@ -3194,7 +3198,7 @@ out: return true; } -static void iommu_snp_enable(void) +static __init void iommu_snp_enable(void) { #ifdef CONFIG_KVM_AMD_SEV if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) @@ -3219,6 +3223,14 @@ static void iommu_snp_enable(void) goto disable_snp; } + /* + * Enable host SNP support once SNP support is checked on IOMMU. + */ + if (snp_rmptable_init()) { + pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); + goto disable_snp; + } + pr_info("IOMMU SNP support enabled.\n"); return; @@ -3318,6 +3330,19 @@ static int __init iommu_go_to_state(enum iommu_init_state state) ret = state_next(); } + /* + * SNP platform initilazation requires IOMMUs to be fully configured. + * If the SNP support on IOMMUs has NOT been checked, simply mark SNP + * as unsupported. If the SNP support on IOMMUs has been checked and + * host SNP support enabled but RMP enforcement has not been enabled + * in IOMMUs, then the system is in a half-baked state, but can limp + * along as all memory should be Hypervisor-Owned in the RMP. WARN, + * but leave SNP as "supported" to avoid confusing the kernel. + */ + if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) && + !WARN_ON_ONCE(amd_iommu_snp_en)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + return ret; } @@ -3426,18 +3451,23 @@ void __init amd_iommu_detect(void) int ret; if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return; + goto disable_snp; if (!amd_iommu_sme_check()) - return; + goto disable_snp; ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); if (ret) - return; + goto disable_snp; amd_iommu_detected = true; iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; + return; + +disable_snp: + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); } /**************************************************************************** diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b48a72bd7b23..cd5116d8c3b2 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2043,12 +2043,12 @@ static void set_dte_entry(struct amd_iommu *iommu, make_clear_dte(dev_data, dte, &new); if (domain->iop.mode != PAGE_MODE_NONE) - new.data[0] = iommu_virt_to_phys(domain->iop.root); + new.data[0] |= iommu_virt_to_phys(domain->iop.root); new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; - new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V; + new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW; /* * When SNP is enabled, we can only support TV=1 with non-zero domain ID. diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index c666ecab955d..69e23e017d9e 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -249,7 +249,7 @@ struct exynos_iommu_domain { struct list_head clients; /* list of sysmmu_drvdata.domain_node */ sysmmu_pte_t *pgtable; /* lv1 page table, 16KB */ short *lv2entcnt; /* free lv2 entry counter for each section */ - spinlock_t lock; /* lock for modyfying list of clients */ + spinlock_t lock; /* lock for modifying list of clients */ spinlock_t pgtablelock; /* lock for modifying page table @ pgtable */ struct iommu_domain domain; /* generic domain data structure */ }; @@ -292,7 +292,7 @@ struct sysmmu_drvdata { struct clk *aclk; /* SYSMMU's aclk clock */ struct clk *pclk; /* SYSMMU's pclk clock */ struct clk *clk_master; /* master's device clock */ - spinlock_t lock; /* lock for modyfying state */ + spinlock_t lock; /* lock for modifying state */ bool active; /* current status */ struct exynos_iommu_domain *domain; /* domain we belong to */ struct list_head domain_node; /* node for domain clients list */ @@ -746,7 +746,7 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) ret = devm_request_irq(dev, irq, exynos_sysmmu_irq, 0, dev_name(dev), data); if (ret) { - dev_err(dev, "Unabled to register handler of irq %d\n", irq); + dev_err(dev, "Unable to register handler of irq %d\n", irq); return ret; } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 9f424acf474e..e540092d664d 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -2043,6 +2043,7 @@ int enable_drhd_fault_handling(unsigned int cpu) /* * Enable fault control interrupt. */ + guard(rwsem_read)(&dmar_global_lock); for_each_iommu(iommu, drhd) { u32 fault_status; int ret; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index cc46098f875b..bf1f0c814348 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3146,7 +3146,14 @@ int __init intel_iommu_init(void) iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, "%s", iommu->name); + /* + * The iommu device probe is protected by the iommu_probe_device_lock. + * Release the dmar_global_lock before entering the device probe path + * to avoid unnecessary lock order splat. + */ + up_read(&dmar_global_lock); iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); + down_read(&dmar_global_lock); iommu_pmu_register(iommu); } @@ -4378,9 +4385,6 @@ static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void * { struct device *dev = data; - if (dev != &pdev->dev) - return 0; - return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); } diff --git a/drivers/iommu/intel/prq.c b/drivers/iommu/intel/prq.c index c2d792db52c3..064194399b38 100644 --- a/drivers/iommu/intel/prq.c +++ b/drivers/iommu/intel/prq.c @@ -87,7 +87,9 @@ prq_retry: struct page_req_dsc *req; req = &iommu->prq[head / sizeof(*req)]; - if (!req->pasid_present || req->pasid != pasid) { + if (req->rid != sid || + (req->pasid_present && pasid != req->pasid) || + (!req->pasid_present && pasid != IOMMU_NO_PASID)) { head = (head + sizeof(*req)) & PRQ_RING_MASK; continue; } diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 4674e618797c..8b5926c1452e 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -478,6 +478,7 @@ void iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) ops->page_response(dev, iopf, &resp); list_del_init(&group->pending_node); + iopf_free_group(group); } mutex_unlock(&fault_param->lock); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 870c3cdbd0f6..60aed01e54f2 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1756,7 +1756,7 @@ static int iommu_get_def_domain_type(struct iommu_group *group, group->id); /* - * Try to recover, drivers are allowed to force IDENITY or DMA, IDENTITY + * Try to recover, drivers are allowed to force IDENTITY or DMA, IDENTITY * takes precedence. */ if (type == IOMMU_DOMAIN_IDENTITY) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index be063bfb50c4..c11b9965c4ad 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -169,6 +169,7 @@ config IXP4XX_IRQ config LAN966X_OIC tristate "Microchip LAN966x OIC Support" + depends on MCHP_LAN966X_PCI || COMPILE_TEST select GENERIC_IRQ_CHIP select IRQ_DOMAIN help diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index da5250f0155c..2b1684c60e3c 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -577,7 +577,8 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs) AIC_FIQ_HWIRQ(AIC_TMR_EL02_VIRT)); } - if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) { + if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) == + (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) { int irq; if (cpumask_test_cpu(smp_processor_id(), &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff)) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 76dce0aac246..270d7a4d85a6 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -44,6 +44,7 @@ static u8 dist_prio_nmi __ro_after_init = GICV3_PRIO_NMI; #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0) #define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1) #define FLAGS_WORKAROUND_ASR_ERRATUM_8601001 (1ULL << 2) +#define FLAGS_WORKAROUND_INSECURE (1ULL << 3) #define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) @@ -83,6 +84,8 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); #define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U) #define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer) +static bool nmi_support_forbidden; + /* * There are 16 SGIs, though we only actually use 8 in Linux. The other 8 SGIs * are potentially stolen by the secure side. Some code, especially code dealing @@ -163,21 +166,27 @@ static void __init gic_prio_init(void) { bool ds; - ds = gic_dist_security_disabled(); - if (!ds) { - u32 val; - - val = readl_relaxed(gic_data.dist_base + GICD_CTLR); - val |= GICD_CTLR_DS; - writel_relaxed(val, gic_data.dist_base + GICD_CTLR); + cpus_have_group0 = gic_has_group0(); - ds = gic_dist_security_disabled(); - if (ds) - pr_warn("Broken GIC integration, security disabled"); + ds = gic_dist_security_disabled(); + if ((gic_data.flags & FLAGS_WORKAROUND_INSECURE) && !ds) { + if (cpus_have_group0) { + u32 val; + + val = readl_relaxed(gic_data.dist_base + GICD_CTLR); + val |= GICD_CTLR_DS; + writel_relaxed(val, gic_data.dist_base + GICD_CTLR); + + ds = gic_dist_security_disabled(); + if (ds) + pr_warn("Broken GIC integration, security disabled\n"); + } else { + pr_warn("Broken GIC integration, pNMI forbidden\n"); + nmi_support_forbidden = true; + } } cpus_have_security_disabled = ds; - cpus_have_group0 = gic_has_group0(); /* * How priority values are used by the GIC depends on two things: @@ -209,7 +218,7 @@ static void __init gic_prio_init(void) * be in the non-secure range, we program the non-secure values into * the distributor to match the PMR values we want. */ - if (cpus_have_group0 & !cpus_have_security_disabled) { + if (cpus_have_group0 && !cpus_have_security_disabled) { dist_prio_irq = __gicv3_prio_to_ns(dist_prio_irq); dist_prio_nmi = __gicv3_prio_to_ns(dist_prio_nmi); } @@ -1922,6 +1931,18 @@ static bool gic_enable_quirk_arm64_2941627(void *data) return true; } +static bool gic_enable_quirk_rk3399(void *data) +{ + struct gic_chip_data *d = data; + + if (of_machine_is_compatible("rockchip,rk3399")) { + d->flags |= FLAGS_WORKAROUND_INSECURE; + return true; + } + + return false; +} + static bool rd_set_non_coherent(void *data) { struct gic_chip_data *d = data; @@ -1997,6 +2018,12 @@ static const struct gic_quirk gic_quirks[] = { .init = rd_set_non_coherent, }, { + .desc = "GICv3: Insecure RK3399 integration", + .iidr = 0x0000043b, + .mask = 0xff000fff, + .init = gic_enable_quirk_rk3399, + }, + { } }; @@ -2004,7 +2031,7 @@ static void gic_enable_nmi_support(void) { int i; - if (!gic_prio_masking_enabled()) + if (!gic_prio_masking_enabled() || nmi_support_forbidden) return; rdist_nmi_refs = kcalloc(gic_data.ppi_nr + SGI_NR, diff --git a/drivers/irqchip/irq-jcore-aic.c b/drivers/irqchip/irq-jcore-aic.c index b9dcc8e78c75..1f613eb7b7f0 100644 --- a/drivers/irqchip/irq-jcore-aic.c +++ b/drivers/irqchip/irq-jcore-aic.c @@ -38,7 +38,7 @@ static struct irq_chip jcore_aic; static void handle_jcore_irq(struct irq_desc *desc) { if (irqd_is_per_cpu(irq_desc_get_irq_data(desc))) - handle_percpu_irq(desc); + handle_percpu_devid_irq(desc); else handle_simple_irq(desc); } diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c index b337f6c05f18..4eebed39880a 100644 --- a/drivers/irqchip/irq-mvebu-icu.c +++ b/drivers/irqchip/irq-mvebu-icu.c @@ -68,7 +68,8 @@ static int mvebu_icu_translate(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *hwirq, unsigned int *type) { unsigned int param_count = static_branch_unlikely(&legacy_bindings) ? 3 : 2; - struct mvebu_icu_msi_data *msi_data = d->host_data; + struct msi_domain_info *info = d->host_data; + struct mvebu_icu_msi_data *msi_data = info->chip_data; struct mvebu_icu *icu = msi_data->icu; /* Check the count of the parameters in dt */ diff --git a/drivers/irqchip/irq-partition-percpu.c b/drivers/irqchip/irq-partition-percpu.c index 8e76d2913e6b..4441ffe149ea 100644 --- a/drivers/irqchip/irq-partition-percpu.c +++ b/drivers/irqchip/irq-partition-percpu.c @@ -98,7 +98,7 @@ static void partition_irq_print_chip(struct irq_data *d, struct seq_file *p) struct irq_chip *chip = irq_desc_get_chip(part->chained_desc); struct irq_data *data = irq_desc_get_irq_data(part->chained_desc); - seq_printf(p, " %5s-%lu", chip->name, data->hwirq); + seq_printf(p, "%5s-%lu", chip->name, data->hwirq); } static struct irq_chip partition_irq_chip = { diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c index c5c2e6929a2f..275df5005705 100644 --- a/drivers/irqchip/irq-riscv-imsic-early.c +++ b/drivers/irqchip/irq-riscv-imsic-early.c @@ -27,7 +27,7 @@ static void imsic_ipi_send(unsigned int cpu) { struct imsic_local_config *local = per_cpu_ptr(imsic->global.local, cpu); - writel_relaxed(IMSIC_IPI_ID, local->msi_va); + writel(IMSIC_IPI_ID, local->msi_va); } static void imsic_ipi_starting_cpu(void) diff --git a/drivers/irqchip/irq-thead-c900-aclint-sswi.c b/drivers/irqchip/irq-thead-c900-aclint-sswi.c index b0e366ade427..8ff6e7a1363b 100644 --- a/drivers/irqchip/irq-thead-c900-aclint-sswi.c +++ b/drivers/irqchip/irq-thead-c900-aclint-sswi.c @@ -31,7 +31,7 @@ static DEFINE_PER_CPU(void __iomem *, sswi_cpu_regs); static void thead_aclint_sswi_ipi_send(unsigned int cpu) { - writel_relaxed(0x1, per_cpu(sswi_cpu_regs, cpu)); + writel(0x1, per_cpu(sswi_cpu_regs, cpu)); } static void thead_aclint_sswi_ipi_clear(void) diff --git a/drivers/irqchip/qcom-pdc.c b/drivers/irqchip/qcom-pdc.c index 74b2f124116e..52d77546aacb 100644 --- a/drivers/irqchip/qcom-pdc.c +++ b/drivers/irqchip/qcom-pdc.c @@ -21,9 +21,11 @@ #include <linux/types.h> #define PDC_MAX_GPIO_IRQS 256 +#define PDC_DRV_OFFSET 0x10000 /* Valid only on HW version < 3.2 */ #define IRQ_ENABLE_BANK 0x10 +#define IRQ_ENABLE_BANK_MAX (IRQ_ENABLE_BANK + BITS_TO_BYTES(PDC_MAX_GPIO_IRQS)) #define IRQ_i_CFG 0x110 /* Valid only on HW version >= 3.2 */ @@ -46,13 +48,20 @@ struct pdc_pin_region { static DEFINE_RAW_SPINLOCK(pdc_lock); static void __iomem *pdc_base; +static void __iomem *pdc_prev_base; static struct pdc_pin_region *pdc_region; static int pdc_region_cnt; static unsigned int pdc_version; +static bool pdc_x1e_quirk; + +static void pdc_base_reg_write(void __iomem *base, int reg, u32 i, u32 val) +{ + writel_relaxed(val, base + reg + i * sizeof(u32)); +} static void pdc_reg_write(int reg, u32 i, u32 val) { - writel_relaxed(val, pdc_base + reg + i * sizeof(u32)); + pdc_base_reg_write(pdc_base, reg, i, val); } static u32 pdc_reg_read(int reg, u32 i) @@ -60,6 +69,34 @@ static u32 pdc_reg_read(int reg, u32 i) return readl_relaxed(pdc_base + reg + i * sizeof(u32)); } +static void pdc_x1e_irq_enable_write(u32 bank, u32 enable) +{ + void __iomem *base; + + /* Remap the write access to work around a hardware bug on X1E */ + switch (bank) { + case 0 ... 1: + /* Use previous DRV (client) region and shift to bank 3-4 */ + base = pdc_prev_base; + bank += 3; + break; + case 2 ... 4: + /* Use our own region and shift to bank 0-2 */ + base = pdc_base; + bank -= 2; + break; + case 5: + /* No fixup required for bank 5 */ + base = pdc_base; + break; + default: + WARN_ON(1); + return; + } + + pdc_base_reg_write(base, IRQ_ENABLE_BANK, bank, enable); +} + static void __pdc_enable_intr(int pin_out, bool on) { unsigned long enable; @@ -72,7 +109,11 @@ static void __pdc_enable_intr(int pin_out, bool on) enable = pdc_reg_read(IRQ_ENABLE_BANK, index); __assign_bit(mask, &enable, on); - pdc_reg_write(IRQ_ENABLE_BANK, index, enable); + + if (pdc_x1e_quirk) + pdc_x1e_irq_enable_write(index, enable); + else + pdc_reg_write(IRQ_ENABLE_BANK, index, enable); } else { enable = pdc_reg_read(IRQ_i_CFG, pin_out); __assign_bit(IRQ_i_CFG_IRQ_ENABLE, &enable, on); @@ -324,10 +365,29 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent) if (res_size > resource_size(&res)) pr_warn("%pOF: invalid reg size, please fix DT\n", node); + /* + * PDC has multiple DRV regions, each one provides the same set of + * registers for a particular client in the system. Due to a hardware + * bug on X1E, some writes to the IRQ_ENABLE_BANK register must be + * issued inside the previous region. This region belongs to + * a different client and is not described in the device tree. Map the + * region with the expected offset to preserve support for old DTs. + */ + if (of_device_is_compatible(node, "qcom,x1e80100-pdc")) { + pdc_prev_base = ioremap(res.start - PDC_DRV_OFFSET, IRQ_ENABLE_BANK_MAX); + if (!pdc_prev_base) { + pr_err("%pOF: unable to map previous PDC DRV region\n", node); + return -ENXIO; + } + + pdc_x1e_quirk = true; + } + pdc_base = ioremap(res.start, res_size); if (!pdc_base) { pr_err("%pOF: unable to map PDC registers\n", node); - return -ENXIO; + ret = -ENXIO; + goto fail; } pdc_version = pdc_reg_read(PDC_VERSION_REG, 0); @@ -363,6 +423,7 @@ static int qcom_pdc_init(struct device_node *node, struct device_node *parent) fail: kfree(pdc_region); iounmap(pdc_base); + iounmap(pdc_prev_base); return ret; } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index ee9f7cecd78e..c45464b6576a 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3790,20 +3790,18 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: { - __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100; - - watermark_percentage += ic->journal_entries / 2; - do_div(watermark_percentage, ic->journal_entries); - arg_count = 3; + arg_count = 1; /* buffer_sectors */ arg_count += !!ic->meta_dev; arg_count += ic->sectors_per_block != 1; arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)); arg_count += ic->reset_recalculate_flag; arg_count += ic->discard; - arg_count += ic->mode == 'J'; - arg_count += ic->mode == 'J'; - arg_count += ic->mode == 'B'; - arg_count += ic->mode == 'B'; + arg_count += ic->mode != 'I'; /* interleave_sectors */ + arg_count += ic->mode == 'J'; /* journal_sectors */ + arg_count += ic->mode == 'J'; /* journal_watermark */ + arg_count += ic->mode == 'J'; /* commit_time */ + arg_count += ic->mode == 'B'; /* sectors_per_bit */ + arg_count += ic->mode == 'B'; /* bitmap_flush_interval */ arg_count += !!ic->internal_hash_alg.alg_string; arg_count += !!ic->journal_crypt_alg.alg_string; arg_count += !!ic->journal_mac_alg.alg_string; @@ -3822,10 +3820,15 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type, DMEMIT(" reset_recalculate"); if (ic->discard) DMEMIT(" allow_discards"); - DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); - DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); + if (ic->mode != 'I') + DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors); DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors); if (ic->mode == 'J') { + __u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100; + + watermark_percentage += ic->journal_entries / 2; + do_div(watermark_percentage, ic->journal_entries); + DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS); DMEMIT(" journal_watermark:%u", (unsigned int)watermark_percentage); DMEMIT(" commit_time:%u", ic->autocommit_msec); } diff --git a/drivers/md/dm-vdo/dedupe.c b/drivers/md/dm-vdo/dedupe.c index b6f8e2dc7729..3f3d29af1be4 100644 --- a/drivers/md/dm-vdo/dedupe.c +++ b/drivers/md/dm-vdo/dedupe.c @@ -2178,6 +2178,7 @@ static int initialize_index(struct vdo *vdo, struct hash_zones *zones) vdo_set_dedupe_index_timeout_interval(vdo_dedupe_index_timeout_interval); vdo_set_dedupe_index_min_timer_interval(vdo_dedupe_index_min_timer_interval); + spin_lock_init(&zones->lock); /* * Since we will save up the timeouts that would have been reported but were ratelimited, diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index a382929ce7ba..369aed044b40 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -76,10 +76,8 @@ static int linear_set_limits(struct mddev *mddev) lim.max_write_zeroes_sectors = mddev->chunk_sectors; lim.io_min = mddev->chunk_sectors << 9; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); - if (err) { - queue_limits_cancel_update(mddev->gendisk->queue); + if (err) return err; - } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 8fc9339b00c7..70bcc3cdf2cd 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -386,10 +386,8 @@ static int raid0_set_limits(struct mddev *mddev) lim.io_opt = lim.io_min * mddev->raid_disks; lim.features |= BLK_FEAT_ATOMIC_WRITES; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); - if (err) { - queue_limits_cancel_update(mddev->gendisk->queue); + if (err) return err; - } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 9d57a88dbd26..10ea3af40991 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3219,10 +3219,8 @@ static int raid1_set_limits(struct mddev *mddev) lim.max_write_zeroes_sectors = 0; lim.features |= BLK_FEAT_ATOMIC_WRITES; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); - if (err) { - queue_limits_cancel_update(mddev->gendisk->queue); + if (err) return err; - } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index efe93b979167..15b9ae5bf84d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4020,10 +4020,8 @@ static int raid10_set_queue_limits(struct mddev *mddev) lim.io_opt = lim.io_min * raid10_nr_stripes(conf); lim.features |= BLK_FEAT_ATOMIC_WRITES; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); - if (err) { - queue_limits_cancel_update(mddev->gendisk->queue); + if (err) return err; - } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/mfd/syscon.c b/drivers/mfd/syscon.c index 226915ca3c93..aa4a9940b569 100644 --- a/drivers/mfd/syscon.c +++ b/drivers/mfd/syscon.c @@ -159,6 +159,7 @@ err_regmap: } static struct regmap *device_node_get_regmap(struct device_node *np, + bool create_regmap, bool check_res) { struct syscon *entry, *syscon = NULL; @@ -172,7 +173,7 @@ static struct regmap *device_node_get_regmap(struct device_node *np, } if (!syscon) { - if (of_device_is_compatible(np, "syscon")) + if (create_regmap) syscon = of_syscon_register(np, check_res); else syscon = ERR_PTR(-EINVAL); @@ -233,15 +234,37 @@ err_unlock: } EXPORT_SYMBOL_GPL(of_syscon_register_regmap); +/** + * device_node_to_regmap() - Get or create a regmap for specified device node + * @np: Device tree node + * + * Get a regmap for the specified device node. If there's not an existing + * regmap, then one is instantiated. This function should not be used if the + * device node has a custom regmap driver or has resources (clocks, resets) to + * be managed. Use syscon_node_to_regmap() instead for those cases. + * + * Return: regmap ptr on success, negative error code on failure. + */ struct regmap *device_node_to_regmap(struct device_node *np) { - return device_node_get_regmap(np, false); + return device_node_get_regmap(np, true, false); } EXPORT_SYMBOL_GPL(device_node_to_regmap); +/** + * syscon_node_to_regmap() - Get or create a regmap for specified syscon device node + * @np: Device tree node + * + * Get a regmap for the specified device node. If there's not an existing + * regmap, then one is instantiated if the node is a generic "syscon". This + * function is safe to use for a syscon registered with + * of_syscon_register_regmap(). + * + * Return: regmap ptr on success, negative error code on failure. + */ struct regmap *syscon_node_to_regmap(struct device_node *np) { - return device_node_get_regmap(np, true); + return device_node_get_regmap(np, of_device_is_compatible(np, "syscon"), true); } EXPORT_SYMBOL_GPL(syscon_node_to_regmap); diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index 4b6e91372526..345ea91629e0 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -273,6 +273,7 @@ #define MSDC_PAD_TUNE_CMD2_SEL BIT(21) /* RW */ #define PAD_DS_TUNE_DLY_SEL BIT(0) /* RW */ +#define PAD_DS_TUNE_DLY2_SEL BIT(1) /* RW */ #define PAD_DS_TUNE_DLY1 GENMASK(6, 2) /* RW */ #define PAD_DS_TUNE_DLY2 GENMASK(11, 7) /* RW */ #define PAD_DS_TUNE_DLY3 GENMASK(16, 12) /* RW */ @@ -318,6 +319,7 @@ /* EMMC50_PAD_DS_TUNE mask */ #define PAD_DS_DLY_SEL BIT(16) /* RW */ +#define PAD_DS_DLY2_SEL BIT(15) /* RW */ #define PAD_DS_DLY1 GENMASK(14, 10) /* RW */ #define PAD_DS_DLY3 GENMASK(4, 0) /* RW */ @@ -2504,13 +2506,23 @@ tune_done: static int msdc_prepare_hs400_tuning(struct mmc_host *mmc, struct mmc_ios *ios) { struct msdc_host *host = mmc_priv(mmc); + host->hs400_mode = true; - if (host->top_base) - writel(host->hs400_ds_delay, - host->top_base + EMMC50_PAD_DS_TUNE); - else - writel(host->hs400_ds_delay, host->base + PAD_DS_TUNE); + if (host->top_base) { + if (host->hs400_ds_dly3) + sdr_set_field(host->top_base + EMMC50_PAD_DS_TUNE, + PAD_DS_DLY3, host->hs400_ds_dly3); + if (host->hs400_ds_delay) + writel(host->hs400_ds_delay, + host->top_base + EMMC50_PAD_DS_TUNE); + } else { + if (host->hs400_ds_dly3) + sdr_set_field(host->base + PAD_DS_TUNE, + PAD_DS_TUNE_DLY3, host->hs400_ds_dly3); + if (host->hs400_ds_delay) + writel(host->hs400_ds_delay, host->base + PAD_DS_TUNE); + } /* hs400 mode must set it to 0 */ sdr_clr_bits(host->base + MSDC_PATCH_BIT2, MSDC_PATCH_BIT2_CFGCRCSTS); /* to improve read performance, set outstanding to 2 */ @@ -2530,14 +2542,11 @@ static int msdc_execute_hs400_tuning(struct mmc_host *mmc, struct mmc_card *card if (host->top_base) { sdr_set_bits(host->top_base + EMMC50_PAD_DS_TUNE, PAD_DS_DLY_SEL); - if (host->hs400_ds_dly3) - sdr_set_field(host->top_base + EMMC50_PAD_DS_TUNE, - PAD_DS_DLY3, host->hs400_ds_dly3); + sdr_clr_bits(host->top_base + EMMC50_PAD_DS_TUNE, + PAD_DS_DLY2_SEL); } else { sdr_set_bits(host->base + PAD_DS_TUNE, PAD_DS_TUNE_DLY_SEL); - if (host->hs400_ds_dly3) - sdr_set_field(host->base + PAD_DS_TUNE, - PAD_DS_TUNE_DLY3, host->hs400_ds_dly3); + sdr_clr_bits(host->base + PAD_DS_TUNE, PAD_DS_TUNE_DLY2_SEL); } host->hs400_tuning = true; diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index b73f673db92b..f75c31815ab0 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -155,7 +155,6 @@ struct sdhci_am654_data { u32 tuning_loop; #define SDHCI_AM654_QUIRK_FORCE_CDTEST BIT(0) -#define SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA BIT(1) }; struct window { @@ -357,29 +356,6 @@ static void sdhci_j721e_4bit_set_clock(struct sdhci_host *host, sdhci_set_clock(host, clock); } -static int sdhci_am654_start_signal_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) -{ - struct sdhci_host *host = mmc_priv(mmc); - struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); - struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); - int ret; - - if ((sdhci_am654->quirks & SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA) && - ios->signal_voltage == MMC_SIGNAL_VOLTAGE_180) { - if (!IS_ERR(mmc->supply.vqmmc)) { - ret = mmc_regulator_set_vqmmc(mmc, ios); - if (ret < 0) { - pr_err("%s: Switching to 1.8V signalling voltage failed,\n", - mmc_hostname(mmc)); - return -EIO; - } - } - return 0; - } - - return sdhci_start_signal_voltage_switch(mmc, ios); -} - static u8 sdhci_am654_write_power_on(struct sdhci_host *host, u8 val, int reg) { writeb(val, host->ioaddr + reg); @@ -868,11 +844,6 @@ static int sdhci_am654_get_of_property(struct platform_device *pdev, if (device_property_read_bool(dev, "ti,fails-without-test-cd")) sdhci_am654->quirks |= SDHCI_AM654_QUIRK_FORCE_CDTEST; - /* Suppress v1p8 ena for eMMC and SD with vqmmc supply */ - if (!!of_parse_phandle(dev->of_node, "vmmc-supply", 0) == - !!of_parse_phandle(dev->of_node, "vqmmc-supply", 0)) - sdhci_am654->quirks |= SDHCI_AM654_QUIRK_SUPPRESS_V1P8_ENA; - sdhci_get_of_property(pdev); return 0; @@ -969,7 +940,6 @@ static int sdhci_am654_probe(struct platform_device *pdev) goto err_pltfm_free; } - host->mmc_host_ops.start_signal_voltage_switch = sdhci_am654_start_signal_voltage_switch; host->mmc_host_ops.execute_tuning = sdhci_am654_execute_tuning; pm_runtime_get_noresume(dev); diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c index 8d1d710e439d..6667eea95597 100644 --- a/drivers/mtd/nand/raw/cadence-nand-controller.c +++ b/drivers/mtd/nand/raw/cadence-nand-controller.c @@ -471,6 +471,8 @@ struct cdns_nand_ctrl { struct { void __iomem *virt; dma_addr_t dma; + dma_addr_t iova_dma; + u32 size; } io; int irq; @@ -1835,11 +1837,11 @@ static int cadence_nand_slave_dma_transfer(struct cdns_nand_ctrl *cdns_ctrl, } if (dir == DMA_FROM_DEVICE) { - src_dma = cdns_ctrl->io.dma; + src_dma = cdns_ctrl->io.iova_dma; dst_dma = buf_dma; } else { src_dma = buf_dma; - dst_dma = cdns_ctrl->io.dma; + dst_dma = cdns_ctrl->io.iova_dma; } tx = dmaengine_prep_dma_memcpy(cdns_ctrl->dmac, dst_dma, src_dma, len, @@ -1861,12 +1863,12 @@ static int cadence_nand_slave_dma_transfer(struct cdns_nand_ctrl *cdns_ctrl, dma_async_issue_pending(cdns_ctrl->dmac); wait_for_completion(&finished); - dma_unmap_single(cdns_ctrl->dev, buf_dma, len, dir); + dma_unmap_single(dma_dev->dev, buf_dma, len, dir); return 0; err_unmap: - dma_unmap_single(cdns_ctrl->dev, buf_dma, len, dir); + dma_unmap_single(dma_dev->dev, buf_dma, len, dir); err: dev_dbg(cdns_ctrl->dev, "Fall back to CPU I/O\n"); @@ -2869,6 +2871,7 @@ cadence_nand_irq_cleanup(int irqnum, struct cdns_nand_ctrl *cdns_ctrl) static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) { dma_cap_mask_t mask; + struct dma_device *dma_dev = cdns_ctrl->dmac->device; int ret; cdns_ctrl->cdma_desc = dma_alloc_coherent(cdns_ctrl->dev, @@ -2904,15 +2907,24 @@ static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) dma_cap_set(DMA_MEMCPY, mask); if (cdns_ctrl->caps1->has_dma) { - cdns_ctrl->dmac = dma_request_channel(mask, NULL, NULL); - if (!cdns_ctrl->dmac) { - dev_err(cdns_ctrl->dev, - "Unable to get a DMA channel\n"); - ret = -EBUSY; + cdns_ctrl->dmac = dma_request_chan_by_mask(&mask); + if (IS_ERR(cdns_ctrl->dmac)) { + ret = dev_err_probe(cdns_ctrl->dev, PTR_ERR(cdns_ctrl->dmac), + "%d: Failed to get a DMA channel\n", ret); goto disable_irq; } } + cdns_ctrl->io.iova_dma = dma_map_resource(dma_dev->dev, cdns_ctrl->io.dma, + cdns_ctrl->io.size, + DMA_BIDIRECTIONAL, 0); + + ret = dma_mapping_error(dma_dev->dev, cdns_ctrl->io.iova_dma); + if (ret) { + dev_err(cdns_ctrl->dev, "Failed to map I/O resource to DMA\n"); + goto dma_release_chnl; + } + nand_controller_init(&cdns_ctrl->controller); INIT_LIST_HEAD(&cdns_ctrl->chips); @@ -2923,18 +2935,22 @@ static int cadence_nand_init(struct cdns_nand_ctrl *cdns_ctrl) if (ret) { dev_err(cdns_ctrl->dev, "Failed to register MTD: %d\n", ret); - goto dma_release_chnl; + goto unmap_dma_resource; } kfree(cdns_ctrl->buf); cdns_ctrl->buf = kzalloc(cdns_ctrl->buf_size, GFP_KERNEL); if (!cdns_ctrl->buf) { ret = -ENOMEM; - goto dma_release_chnl; + goto unmap_dma_resource; } return 0; +unmap_dma_resource: + dma_unmap_resource(dma_dev->dev, cdns_ctrl->io.iova_dma, + cdns_ctrl->io.size, DMA_BIDIRECTIONAL, 0); + dma_release_chnl: if (cdns_ctrl->dmac) dma_release_channel(cdns_ctrl->dmac); @@ -2956,6 +2972,10 @@ free_buf_desc: static void cadence_nand_remove(struct cdns_nand_ctrl *cdns_ctrl) { cadence_nand_chips_cleanup(cdns_ctrl); + if (cdns_ctrl->dmac) + dma_unmap_resource(cdns_ctrl->dmac->device->dev, + cdns_ctrl->io.iova_dma, cdns_ctrl->io.size, + DMA_BIDIRECTIONAL, 0); cadence_nand_irq_cleanup(cdns_ctrl->irq, cdns_ctrl); kfree(cdns_ctrl->buf); dma_free_coherent(cdns_ctrl->dev, sizeof(struct cadence_nand_cdma_desc), @@ -3020,7 +3040,9 @@ static int cadence_nand_dt_probe(struct platform_device *ofdev) cdns_ctrl->io.virt = devm_platform_get_and_ioremap_resource(ofdev, 1, &res); if (IS_ERR(cdns_ctrl->io.virt)) return PTR_ERR(cdns_ctrl->io.virt); + cdns_ctrl->io.dma = res->start; + cdns_ctrl->io.size = resource_size(res); dt->clk = devm_clk_get(cdns_ctrl->dev, "nf_clk"); if (IS_ERR(dt->clk)) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index d2d2aeee42a7..6720b547892b 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -1881,18 +1881,18 @@ static int qcom_param_page_type_exec(struct nand_chip *chip, const struct nand_ nandc->regs->addr0 = 0; nandc->regs->addr1 = 0; - host->cfg0 = FIELD_PREP(CW_PER_PAGE_MASK, 0) | - FIELD_PREP(UD_SIZE_BYTES_MASK, 512) | - FIELD_PREP(NUM_ADDR_CYCLES_MASK, 5) | - FIELD_PREP(SPARE_SIZE_BYTES_MASK, 0); - - host->cfg1 = FIELD_PREP(NAND_RECOVERY_CYCLES_MASK, 7) | - FIELD_PREP(BAD_BLOCK_BYTE_NUM_MASK, 17) | - FIELD_PREP(CS_ACTIVE_BSY, 0) | - FIELD_PREP(BAD_BLOCK_IN_SPARE_AREA, 1) | - FIELD_PREP(WR_RD_BSY_GAP_MASK, 2) | - FIELD_PREP(WIDE_FLASH, 0) | - FIELD_PREP(DEV0_CFG1_ECC_DISABLE, 1); + nandc->regs->cfg0 = cpu_to_le32(FIELD_PREP(CW_PER_PAGE_MASK, 0) | + FIELD_PREP(UD_SIZE_BYTES_MASK, 512) | + FIELD_PREP(NUM_ADDR_CYCLES_MASK, 5) | + FIELD_PREP(SPARE_SIZE_BYTES_MASK, 0)); + + nandc->regs->cfg1 = cpu_to_le32(FIELD_PREP(NAND_RECOVERY_CYCLES_MASK, 7) | + FIELD_PREP(BAD_BLOCK_BYTE_NUM_MASK, 17) | + FIELD_PREP(CS_ACTIVE_BSY, 0) | + FIELD_PREP(BAD_BLOCK_IN_SPARE_AREA, 1) | + FIELD_PREP(WR_RD_BSY_GAP_MASK, 2) | + FIELD_PREP(WIDE_FLASH, 0) | + FIELD_PREP(DEV0_CFG1_ECC_DISABLE, 1)); if (!nandc->props->qpic_version2) nandc->regs->ecc_buf_cfg = cpu_to_le32(ECC_CFG_ECC_DISABLE); diff --git a/drivers/mtd/spi-nor/sst.c b/drivers/mtd/spi-nor/sst.c index b5ad7118c49a..175211fe6a5e 100644 --- a/drivers/mtd/spi-nor/sst.c +++ b/drivers/mtd/spi-nor/sst.c @@ -174,7 +174,7 @@ static int sst_nor_write_data(struct spi_nor *nor, loff_t to, size_t len, int ret; nor->program_opcode = op; - ret = spi_nor_write_data(nor, to, 1, buf); + ret = spi_nor_write_data(nor, to, len, buf); if (ret < 0) return ret; WARN(ret != len, "While writing %zu byte written %i bytes\n", len, ret); diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index 6cba9717a6d8..399844809bbe 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -385,15 +385,16 @@ static int c_can_plat_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "registering %s failed (err=%d)\n", KBUILD_MODNAME, ret); - goto exit_free_device; + goto exit_pm_runtime; } dev_info(&pdev->dev, "%s device registered (regs=%p, irq=%d)\n", KBUILD_MODNAME, priv->base, dev->irq); return 0; -exit_free_device: +exit_pm_runtime: pm_runtime_disable(priv->device); +exit_free_device: free_c_can_dev(dev); exit: dev_err(&pdev->dev, "probe failed\n"); diff --git a/drivers/net/can/ctucanfd/ctucanfd_base.c b/drivers/net/can/ctucanfd/ctucanfd_base.c index 64c349fd4600..f65c1a1e05cc 100644 --- a/drivers/net/can/ctucanfd/ctucanfd_base.c +++ b/drivers/net/can/ctucanfd/ctucanfd_base.c @@ -867,10 +867,12 @@ static void ctucan_err_interrupt(struct net_device *ndev, u32 isr) } break; case CAN_STATE_ERROR_ACTIVE: - cf->can_id |= CAN_ERR_CNT; - cf->data[1] = CAN_ERR_CRTL_ACTIVE; - cf->data[6] = bec.txerr; - cf->data[7] = bec.rxerr; + if (skb) { + cf->can_id |= CAN_ERR_CNT; + cf->data[1] = CAN_ERR_CRTL_ACTIVE; + cf->data[6] = bec.txerr; + cf->data[7] = bec.rxerr; + } break; default: netdev_warn(ndev, "unhandled error state (%d:%s)!\n", diff --git a/drivers/net/can/rockchip/rockchip_canfd-core.c b/drivers/net/can/rockchip/rockchip_canfd-core.c index df18c85fc078..d9a937ba126c 100644 --- a/drivers/net/can/rockchip/rockchip_canfd-core.c +++ b/drivers/net/can/rockchip/rockchip_canfd-core.c @@ -622,7 +622,7 @@ rkcanfd_handle_rx_fifo_overflow_int(struct rkcanfd_priv *priv) netdev_dbg(priv->ndev, "RX-FIFO overflow\n"); skb = rkcanfd_alloc_can_err_skb(priv, &cf, ×tamp); - if (skb) + if (!skb) return 0; rkcanfd_get_berr_counter_corrected(priv, &bec); diff --git a/drivers/net/can/usb/etas_es58x/es58x_devlink.c b/drivers/net/can/usb/etas_es58x/es58x_devlink.c index eee20839d96f..0d155eb1b9e9 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_devlink.c +++ b/drivers/net/can/usb/etas_es58x/es58x_devlink.c @@ -248,7 +248,11 @@ static int es58x_devlink_info_get(struct devlink *devlink, return ret; } - return devlink_info_serial_number_put(req, es58x_dev->udev->serial); + if (es58x_dev->udev->serial) + ret = devlink_info_serial_number_put(req, + es58x_dev->udev->serial); + + return ret; } const struct devlink_ops es58x_dl_ops = { diff --git a/drivers/net/dsa/realtek/Kconfig b/drivers/net/dsa/realtek/Kconfig index 6989972eebc3..10687722d14c 100644 --- a/drivers/net/dsa/realtek/Kconfig +++ b/drivers/net/dsa/realtek/Kconfig @@ -43,4 +43,10 @@ config NET_DSA_REALTEK_RTL8366RB help Select to enable support for Realtek RTL8366RB. +config NET_DSA_REALTEK_RTL8366RB_LEDS + bool "Support RTL8366RB LED control" + depends on (LEDS_CLASS=y || LEDS_CLASS=NET_DSA_REALTEK_RTL8366RB) + depends on NET_DSA_REALTEK_RTL8366RB + default NET_DSA_REALTEK_RTL8366RB + endif diff --git a/drivers/net/dsa/realtek/Makefile b/drivers/net/dsa/realtek/Makefile index 35491dc20d6d..17367bcba496 100644 --- a/drivers/net/dsa/realtek/Makefile +++ b/drivers/net/dsa/realtek/Makefile @@ -12,4 +12,7 @@ endif obj-$(CONFIG_NET_DSA_REALTEK_RTL8366RB) += rtl8366.o rtl8366-objs := rtl8366-core.o rtl8366rb.o +ifdef CONFIG_NET_DSA_REALTEK_RTL8366RB_LEDS +rtl8366-objs += rtl8366rb-leds.o +endif obj-$(CONFIG_NET_DSA_REALTEK_RTL8365MB) += rtl8365mb.o diff --git a/drivers/net/dsa/realtek/rtl8366rb-leds.c b/drivers/net/dsa/realtek/rtl8366rb-leds.c new file mode 100644 index 000000000000..99c890681ae6 --- /dev/null +++ b/drivers/net/dsa/realtek/rtl8366rb-leds.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bitops.h> +#include <linux/regmap.h> +#include <net/dsa.h> +#include "rtl83xx.h" +#include "rtl8366rb.h" + +static inline u32 rtl8366rb_led_group_port_mask(u8 led_group, u8 port) +{ + switch (led_group) { + case 0: + return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + case 1: + return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + case 2: + return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + case 3: + return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); + default: + return 0; + } +} + +static int rb8366rb_get_port_led(struct rtl8366rb_led *led) +{ + struct realtek_priv *priv = led->priv; + u8 led_group = led->led_group; + u8 port_num = led->port_num; + int ret; + u32 val; + + ret = regmap_read(priv->map, RTL8366RB_LED_X_X_CTRL_REG(led_group), + &val); + if (ret) { + dev_err(priv->dev, "error reading LED on port %d group %d\n", + led_group, port_num); + return ret; + } + + return !!(val & rtl8366rb_led_group_port_mask(led_group, port_num)); +} + +static int rb8366rb_set_port_led(struct rtl8366rb_led *led, bool enable) +{ + struct realtek_priv *priv = led->priv; + u8 led_group = led->led_group; + u8 port_num = led->port_num; + int ret; + + ret = regmap_update_bits(priv->map, + RTL8366RB_LED_X_X_CTRL_REG(led_group), + rtl8366rb_led_group_port_mask(led_group, + port_num), + enable ? 0xffff : 0); + if (ret) { + dev_err(priv->dev, "error updating LED on port %d group %d\n", + led_group, port_num); + return ret; + } + + /* Change the LED group to manual controlled LEDs if required */ + ret = rb8366rb_set_ledgroup_mode(priv, led_group, + RTL8366RB_LEDGROUP_FORCE); + + if (ret) { + dev_err(priv->dev, "error updating LED GROUP group %d\n", + led_group); + return ret; + } + + return 0; +} + +static int +rtl8366rb_cled_brightness_set_blocking(struct led_classdev *ldev, + enum led_brightness brightness) +{ + struct rtl8366rb_led *led = container_of(ldev, struct rtl8366rb_led, + cdev); + + return rb8366rb_set_port_led(led, brightness == LED_ON); +} + +static int rtl8366rb_setup_led(struct realtek_priv *priv, struct dsa_port *dp, + struct fwnode_handle *led_fwnode) +{ + struct rtl8366rb *rb = priv->chip_data; + struct led_init_data init_data = { }; + enum led_default_state state; + struct rtl8366rb_led *led; + u32 led_group; + int ret; + + ret = fwnode_property_read_u32(led_fwnode, "reg", &led_group); + if (ret) + return ret; + + if (led_group >= RTL8366RB_NUM_LEDGROUPS) { + dev_warn(priv->dev, "Invalid LED reg %d defined for port %d", + led_group, dp->index); + return -EINVAL; + } + + led = &rb->leds[dp->index][led_group]; + led->port_num = dp->index; + led->led_group = led_group; + led->priv = priv; + + state = led_init_default_state_get(led_fwnode); + switch (state) { + case LEDS_DEFSTATE_ON: + led->cdev.brightness = 1; + rb8366rb_set_port_led(led, 1); + break; + case LEDS_DEFSTATE_KEEP: + led->cdev.brightness = + rb8366rb_get_port_led(led); + break; + case LEDS_DEFSTATE_OFF: + default: + led->cdev.brightness = 0; + rb8366rb_set_port_led(led, 0); + } + + led->cdev.max_brightness = 1; + led->cdev.brightness_set_blocking = + rtl8366rb_cled_brightness_set_blocking; + init_data.fwnode = led_fwnode; + init_data.devname_mandatory = true; + + init_data.devicename = kasprintf(GFP_KERNEL, "Realtek-%d:0%d:%d", + dp->ds->index, dp->index, led_group); + if (!init_data.devicename) + return -ENOMEM; + + ret = devm_led_classdev_register_ext(priv->dev, &led->cdev, &init_data); + if (ret) { + dev_warn(priv->dev, "Failed to init LED %d for port %d", + led_group, dp->index); + return ret; + } + + return 0; +} + +int rtl8366rb_setup_leds(struct realtek_priv *priv) +{ + struct dsa_switch *ds = &priv->ds; + struct device_node *leds_np; + struct dsa_port *dp; + int ret = 0; + + dsa_switch_for_each_port(dp, ds) { + if (!dp->dn) + continue; + + leds_np = of_get_child_by_name(dp->dn, "leds"); + if (!leds_np) { + dev_dbg(priv->dev, "No leds defined for port %d", + dp->index); + continue; + } + + for_each_child_of_node_scoped(leds_np, led_np) { + ret = rtl8366rb_setup_led(priv, dp, + of_fwnode_handle(led_np)); + if (ret) + break; + } + + of_node_put(leds_np); + if (ret) + return ret; + } + return 0; +} diff --git a/drivers/net/dsa/realtek/rtl8366rb.c b/drivers/net/dsa/realtek/rtl8366rb.c index 4c4a95d4380c..f54771cab56d 100644 --- a/drivers/net/dsa/realtek/rtl8366rb.c +++ b/drivers/net/dsa/realtek/rtl8366rb.c @@ -27,11 +27,7 @@ #include "realtek-smi.h" #include "realtek-mdio.h" #include "rtl83xx.h" - -#define RTL8366RB_PORT_NUM_CPU 5 -#define RTL8366RB_NUM_PORTS 6 -#define RTL8366RB_PHY_NO_MAX 4 -#define RTL8366RB_PHY_ADDR_MAX 31 +#include "rtl8366rb.h" /* Switch Global Configuration register */ #define RTL8366RB_SGCR 0x0000 @@ -176,39 +172,6 @@ */ #define RTL8366RB_VLAN_INGRESS_CTRL2_REG 0x037f -/* LED control registers */ -/* The LED blink rate is global; it is used by all triggers in all groups. */ -#define RTL8366RB_LED_BLINKRATE_REG 0x0430 -#define RTL8366RB_LED_BLINKRATE_MASK 0x0007 -#define RTL8366RB_LED_BLINKRATE_28MS 0x0000 -#define RTL8366RB_LED_BLINKRATE_56MS 0x0001 -#define RTL8366RB_LED_BLINKRATE_84MS 0x0002 -#define RTL8366RB_LED_BLINKRATE_111MS 0x0003 -#define RTL8366RB_LED_BLINKRATE_222MS 0x0004 -#define RTL8366RB_LED_BLINKRATE_446MS 0x0005 - -/* LED trigger event for each group */ -#define RTL8366RB_LED_CTRL_REG 0x0431 -#define RTL8366RB_LED_CTRL_OFFSET(led_group) \ - (4 * (led_group)) -#define RTL8366RB_LED_CTRL_MASK(led_group) \ - (0xf << RTL8366RB_LED_CTRL_OFFSET(led_group)) - -/* The RTL8366RB_LED_X_X registers are used to manually set the LED state only - * when the corresponding LED group in RTL8366RB_LED_CTRL_REG is - * RTL8366RB_LEDGROUP_FORCE. Otherwise, it is ignored. - */ -#define RTL8366RB_LED_0_1_CTRL_REG 0x0432 -#define RTL8366RB_LED_2_3_CTRL_REG 0x0433 -#define RTL8366RB_LED_X_X_CTRL_REG(led_group) \ - ((led_group) <= 1 ? \ - RTL8366RB_LED_0_1_CTRL_REG : \ - RTL8366RB_LED_2_3_CTRL_REG) -#define RTL8366RB_LED_0_X_CTRL_MASK GENMASK(5, 0) -#define RTL8366RB_LED_X_1_CTRL_MASK GENMASK(11, 6) -#define RTL8366RB_LED_2_X_CTRL_MASK GENMASK(5, 0) -#define RTL8366RB_LED_X_3_CTRL_MASK GENMASK(11, 6) - #define RTL8366RB_MIB_COUNT 33 #define RTL8366RB_GLOBAL_MIB_COUNT 1 #define RTL8366RB_MIB_COUNTER_PORT_OFFSET 0x0050 @@ -244,7 +207,6 @@ #define RTL8366RB_PORT_STATUS_AN_MASK 0x0080 #define RTL8366RB_NUM_VLANS 16 -#define RTL8366RB_NUM_LEDGROUPS 4 #define RTL8366RB_NUM_VIDS 4096 #define RTL8366RB_PRIORITYMAX 7 #define RTL8366RB_NUM_FIDS 8 @@ -351,46 +313,6 @@ #define RTL8366RB_GREEN_FEATURE_TX BIT(0) #define RTL8366RB_GREEN_FEATURE_RX BIT(2) -enum rtl8366_ledgroup_mode { - RTL8366RB_LEDGROUP_OFF = 0x0, - RTL8366RB_LEDGROUP_DUP_COL = 0x1, - RTL8366RB_LEDGROUP_LINK_ACT = 0x2, - RTL8366RB_LEDGROUP_SPD1000 = 0x3, - RTL8366RB_LEDGROUP_SPD100 = 0x4, - RTL8366RB_LEDGROUP_SPD10 = 0x5, - RTL8366RB_LEDGROUP_SPD1000_ACT = 0x6, - RTL8366RB_LEDGROUP_SPD100_ACT = 0x7, - RTL8366RB_LEDGROUP_SPD10_ACT = 0x8, - RTL8366RB_LEDGROUP_SPD100_10_ACT = 0x9, - RTL8366RB_LEDGROUP_FIBER = 0xa, - RTL8366RB_LEDGROUP_AN_FAULT = 0xb, - RTL8366RB_LEDGROUP_LINK_RX = 0xc, - RTL8366RB_LEDGROUP_LINK_TX = 0xd, - RTL8366RB_LEDGROUP_MASTER = 0xe, - RTL8366RB_LEDGROUP_FORCE = 0xf, - - __RTL8366RB_LEDGROUP_MODE_MAX -}; - -struct rtl8366rb_led { - u8 port_num; - u8 led_group; - struct realtek_priv *priv; - struct led_classdev cdev; -}; - -/** - * struct rtl8366rb - RTL8366RB-specific data - * @max_mtu: per-port max MTU setting - * @pvid_enabled: if PVID is set for respective port - * @leds: per-port and per-ledgroup led info - */ -struct rtl8366rb { - unsigned int max_mtu[RTL8366RB_NUM_PORTS]; - bool pvid_enabled[RTL8366RB_NUM_PORTS]; - struct rtl8366rb_led leds[RTL8366RB_NUM_PORTS][RTL8366RB_NUM_LEDGROUPS]; -}; - static struct rtl8366_mib_counter rtl8366rb_mib_counters[] = { { 0, 0, 4, "IfInOctets" }, { 0, 4, 4, "EtherStatsOctets" }, @@ -831,9 +753,10 @@ static int rtl8366rb_jam_table(const struct rtl8366rb_jam_tbl_entry *jam_table, return 0; } -static int rb8366rb_set_ledgroup_mode(struct realtek_priv *priv, - u8 led_group, - enum rtl8366_ledgroup_mode mode) +/* This code is used also with LEDs disabled */ +int rb8366rb_set_ledgroup_mode(struct realtek_priv *priv, + u8 led_group, + enum rtl8366_ledgroup_mode mode) { int ret; u32 val; @@ -850,144 +773,7 @@ static int rb8366rb_set_ledgroup_mode(struct realtek_priv *priv, return 0; } -static inline u32 rtl8366rb_led_group_port_mask(u8 led_group, u8 port) -{ - switch (led_group) { - case 0: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); - case 1: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); - case 2: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); - case 3: - return FIELD_PREP(RTL8366RB_LED_0_X_CTRL_MASK, BIT(port)); - default: - return 0; - } -} - -static int rb8366rb_get_port_led(struct rtl8366rb_led *led) -{ - struct realtek_priv *priv = led->priv; - u8 led_group = led->led_group; - u8 port_num = led->port_num; - int ret; - u32 val; - - ret = regmap_read(priv->map, RTL8366RB_LED_X_X_CTRL_REG(led_group), - &val); - if (ret) { - dev_err(priv->dev, "error reading LED on port %d group %d\n", - led_group, port_num); - return ret; - } - - return !!(val & rtl8366rb_led_group_port_mask(led_group, port_num)); -} - -static int rb8366rb_set_port_led(struct rtl8366rb_led *led, bool enable) -{ - struct realtek_priv *priv = led->priv; - u8 led_group = led->led_group; - u8 port_num = led->port_num; - int ret; - - ret = regmap_update_bits(priv->map, - RTL8366RB_LED_X_X_CTRL_REG(led_group), - rtl8366rb_led_group_port_mask(led_group, - port_num), - enable ? 0xffff : 0); - if (ret) { - dev_err(priv->dev, "error updating LED on port %d group %d\n", - led_group, port_num); - return ret; - } - - /* Change the LED group to manual controlled LEDs if required */ - ret = rb8366rb_set_ledgroup_mode(priv, led_group, - RTL8366RB_LEDGROUP_FORCE); - - if (ret) { - dev_err(priv->dev, "error updating LED GROUP group %d\n", - led_group); - return ret; - } - - return 0; -} - -static int -rtl8366rb_cled_brightness_set_blocking(struct led_classdev *ldev, - enum led_brightness brightness) -{ - struct rtl8366rb_led *led = container_of(ldev, struct rtl8366rb_led, - cdev); - - return rb8366rb_set_port_led(led, brightness == LED_ON); -} - -static int rtl8366rb_setup_led(struct realtek_priv *priv, struct dsa_port *dp, - struct fwnode_handle *led_fwnode) -{ - struct rtl8366rb *rb = priv->chip_data; - struct led_init_data init_data = { }; - enum led_default_state state; - struct rtl8366rb_led *led; - u32 led_group; - int ret; - - ret = fwnode_property_read_u32(led_fwnode, "reg", &led_group); - if (ret) - return ret; - - if (led_group >= RTL8366RB_NUM_LEDGROUPS) { - dev_warn(priv->dev, "Invalid LED reg %d defined for port %d", - led_group, dp->index); - return -EINVAL; - } - - led = &rb->leds[dp->index][led_group]; - led->port_num = dp->index; - led->led_group = led_group; - led->priv = priv; - - state = led_init_default_state_get(led_fwnode); - switch (state) { - case LEDS_DEFSTATE_ON: - led->cdev.brightness = 1; - rb8366rb_set_port_led(led, 1); - break; - case LEDS_DEFSTATE_KEEP: - led->cdev.brightness = - rb8366rb_get_port_led(led); - break; - case LEDS_DEFSTATE_OFF: - default: - led->cdev.brightness = 0; - rb8366rb_set_port_led(led, 0); - } - - led->cdev.max_brightness = 1; - led->cdev.brightness_set_blocking = - rtl8366rb_cled_brightness_set_blocking; - init_data.fwnode = led_fwnode; - init_data.devname_mandatory = true; - - init_data.devicename = kasprintf(GFP_KERNEL, "Realtek-%d:0%d:%d", - dp->ds->index, dp->index, led_group); - if (!init_data.devicename) - return -ENOMEM; - - ret = devm_led_classdev_register_ext(priv->dev, &led->cdev, &init_data); - if (ret) { - dev_warn(priv->dev, "Failed to init LED %d for port %d", - led_group, dp->index); - return ret; - } - - return 0; -} - +/* This code is used also with LEDs disabled */ static int rtl8366rb_setup_all_leds_off(struct realtek_priv *priv) { int ret = 0; @@ -1008,38 +794,6 @@ static int rtl8366rb_setup_all_leds_off(struct realtek_priv *priv) return ret; } -static int rtl8366rb_setup_leds(struct realtek_priv *priv) -{ - struct dsa_switch *ds = &priv->ds; - struct device_node *leds_np; - struct dsa_port *dp; - int ret = 0; - - dsa_switch_for_each_port(dp, ds) { - if (!dp->dn) - continue; - - leds_np = of_get_child_by_name(dp->dn, "leds"); - if (!leds_np) { - dev_dbg(priv->dev, "No leds defined for port %d", - dp->index); - continue; - } - - for_each_child_of_node_scoped(leds_np, led_np) { - ret = rtl8366rb_setup_led(priv, dp, - of_fwnode_handle(led_np)); - if (ret) - break; - } - - of_node_put(leds_np); - if (ret) - return ret; - } - return 0; -} - static int rtl8366rb_setup(struct dsa_switch *ds) { struct realtek_priv *priv = ds->priv; diff --git a/drivers/net/dsa/realtek/rtl8366rb.h b/drivers/net/dsa/realtek/rtl8366rb.h new file mode 100644 index 000000000000..685ff3275faa --- /dev/null +++ b/drivers/net/dsa/realtek/rtl8366rb.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _RTL8366RB_H +#define _RTL8366RB_H + +#include "realtek.h" + +#define RTL8366RB_PORT_NUM_CPU 5 +#define RTL8366RB_NUM_PORTS 6 +#define RTL8366RB_PHY_NO_MAX 4 +#define RTL8366RB_NUM_LEDGROUPS 4 +#define RTL8366RB_PHY_ADDR_MAX 31 + +/* LED control registers */ +/* The LED blink rate is global; it is used by all triggers in all groups. */ +#define RTL8366RB_LED_BLINKRATE_REG 0x0430 +#define RTL8366RB_LED_BLINKRATE_MASK 0x0007 +#define RTL8366RB_LED_BLINKRATE_28MS 0x0000 +#define RTL8366RB_LED_BLINKRATE_56MS 0x0001 +#define RTL8366RB_LED_BLINKRATE_84MS 0x0002 +#define RTL8366RB_LED_BLINKRATE_111MS 0x0003 +#define RTL8366RB_LED_BLINKRATE_222MS 0x0004 +#define RTL8366RB_LED_BLINKRATE_446MS 0x0005 + +/* LED trigger event for each group */ +#define RTL8366RB_LED_CTRL_REG 0x0431 +#define RTL8366RB_LED_CTRL_OFFSET(led_group) \ + (4 * (led_group)) +#define RTL8366RB_LED_CTRL_MASK(led_group) \ + (0xf << RTL8366RB_LED_CTRL_OFFSET(led_group)) + +/* The RTL8366RB_LED_X_X registers are used to manually set the LED state only + * when the corresponding LED group in RTL8366RB_LED_CTRL_REG is + * RTL8366RB_LEDGROUP_FORCE. Otherwise, it is ignored. + */ +#define RTL8366RB_LED_0_1_CTRL_REG 0x0432 +#define RTL8366RB_LED_2_3_CTRL_REG 0x0433 +#define RTL8366RB_LED_X_X_CTRL_REG(led_group) \ + ((led_group) <= 1 ? \ + RTL8366RB_LED_0_1_CTRL_REG : \ + RTL8366RB_LED_2_3_CTRL_REG) +#define RTL8366RB_LED_0_X_CTRL_MASK GENMASK(5, 0) +#define RTL8366RB_LED_X_1_CTRL_MASK GENMASK(11, 6) +#define RTL8366RB_LED_2_X_CTRL_MASK GENMASK(5, 0) +#define RTL8366RB_LED_X_3_CTRL_MASK GENMASK(11, 6) + +enum rtl8366_ledgroup_mode { + RTL8366RB_LEDGROUP_OFF = 0x0, + RTL8366RB_LEDGROUP_DUP_COL = 0x1, + RTL8366RB_LEDGROUP_LINK_ACT = 0x2, + RTL8366RB_LEDGROUP_SPD1000 = 0x3, + RTL8366RB_LEDGROUP_SPD100 = 0x4, + RTL8366RB_LEDGROUP_SPD10 = 0x5, + RTL8366RB_LEDGROUP_SPD1000_ACT = 0x6, + RTL8366RB_LEDGROUP_SPD100_ACT = 0x7, + RTL8366RB_LEDGROUP_SPD10_ACT = 0x8, + RTL8366RB_LEDGROUP_SPD100_10_ACT = 0x9, + RTL8366RB_LEDGROUP_FIBER = 0xa, + RTL8366RB_LEDGROUP_AN_FAULT = 0xb, + RTL8366RB_LEDGROUP_LINK_RX = 0xc, + RTL8366RB_LEDGROUP_LINK_TX = 0xd, + RTL8366RB_LEDGROUP_MASTER = 0xe, + RTL8366RB_LEDGROUP_FORCE = 0xf, + + __RTL8366RB_LEDGROUP_MODE_MAX +}; + +#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB_LEDS) + +struct rtl8366rb_led { + u8 port_num; + u8 led_group; + struct realtek_priv *priv; + struct led_classdev cdev; +}; + +int rtl8366rb_setup_leds(struct realtek_priv *priv); + +#else + +static inline int rtl8366rb_setup_leds(struct realtek_priv *priv) +{ + return 0; +} + +#endif /* IS_ENABLED(CONFIG_LEDS_CLASS) */ + +/** + * struct rtl8366rb - RTL8366RB-specific data + * @max_mtu: per-port max MTU setting + * @pvid_enabled: if PVID is set for respective port + * @leds: per-port and per-ledgroup led info + */ +struct rtl8366rb { + unsigned int max_mtu[RTL8366RB_NUM_PORTS]; + bool pvid_enabled[RTL8366RB_NUM_PORTS]; +#if IS_ENABLED(CONFIG_NET_DSA_REALTEK_RTL8366RB_LEDS) + struct rtl8366rb_led leds[RTL8366RB_NUM_PORTS][RTL8366RB_NUM_LEDGROUPS]; +#endif +}; + +/* This code is used also with LEDs disabled */ +int rb8366rb_set_ledgroup_mode(struct realtek_priv *priv, + u8 led_group, + enum rtl8366_ledgroup_mode mode); + +#endif /* _RTL8366RB_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index fe0e3e2a8117..71e50fc65c14 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -1441,7 +1441,9 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down) aq_ptp_ring_free(self); aq_ptp_free(self); - if (likely(self->aq_fw_ops->deinit) && link_down) { + /* May be invoked during hot unplug. */ + if (pci_device_is_present(self->pdev) && + likely(self->aq_fw_ops->deinit) && link_down) { mutex_lock(&self->fwreq_mutex); self->aq_fw_ops->deinit(self->aq_hw); mutex_unlock(&self->fwreq_mutex); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 0715ea5bf13e..3b082114f2e5 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -41,9 +41,12 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; + u32 phy_wolopts = 0; - if (dev->phydev) + if (dev->phydev) { phy_ethtool_get_wol(dev->phydev, wol); + phy_wolopts = wol->wolopts; + } /* MAC is not wake-up capable, return what the PHY does */ if (!device_can_wakeup(kdev)) @@ -51,9 +54,14 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) /* Overlay MAC capabilities with that of the PHY queried before */ wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; - wol->wolopts = priv->wolopts; - memset(wol->sopass, 0, sizeof(wol->sopass)); + wol->wolopts |= priv->wolopts; + /* Return the PHY configured magic password */ + if (phy_wolopts & WAKE_MAGICSECURE) + return; + + /* Otherwise the MAC one */ + memset(wol->sopass, 0, sizeof(wol->sopass)); if (wol->wolopts & WAKE_MAGICSECURE) memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass)); } @@ -70,7 +78,7 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) /* Try Wake-on-LAN from the PHY first */ if (dev->phydev) { ret = phy_ethtool_set_wol(dev->phydev, wol); - if (ret != -EOPNOTSUPP) + if (ret != -EOPNOTSUPP && wol->wolopts) return ret; } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1c94bf1db718..d9d675f1ebfe 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -55,6 +55,7 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/crc32poly.h> +#include <linux/dmi.h> #include <net/checksum.h> #include <net/gso.h> @@ -18212,6 +18213,50 @@ unlock: static SIMPLE_DEV_PM_OPS(tg3_pm_ops, tg3_suspend, tg3_resume); +/* Systems where ACPI _PTS (Prepare To Sleep) S5 will result in a fatal + * PCIe AER event on the tg3 device if the tg3 device is not, or cannot + * be, powered down. + */ +static const struct dmi_system_id tg3_restart_aer_quirk_table[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R440"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R540"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R640"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R650"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R740"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R750"), + }, + }, + {} +}; + static void tg3_shutdown(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); @@ -18228,6 +18273,19 @@ static void tg3_shutdown(struct pci_dev *pdev) if (system_state == SYSTEM_POWER_OFF) tg3_power_down(tp); + else if (system_state == SYSTEM_RESTART && + dmi_first_match(tg3_restart_aer_quirk_table) && + pdev->current_state != PCI_D3cold && + pdev->current_state != PCI_UNKNOWN) { + /* Disable PCIe AER on the tg3 to avoid a fatal + * error during this system restart. + */ + pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_CERE | + PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + } rtnl_unlock(); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 5740c98d8c9f..2847278d9cd4 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -1279,6 +1279,8 @@ struct macb { struct clk *rx_clk; struct clk *tsu_clk; struct net_device *dev; + /* Protects hw_stats and ethtool_stats */ + spinlock_t stats_lock; union { struct macb_stats macb; struct gem_stats gem; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 48496209fb16..c1f57d96e63f 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1978,10 +1978,12 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id) if (status & MACB_BIT(ISR_ROVR)) { /* We missed at least one packet */ + spin_lock(&bp->stats_lock); if (macb_is_gem(bp)) bp->hw_stats.gem.rx_overruns++; else bp->hw_stats.macb.rx_overruns++; + spin_unlock(&bp->stats_lock); if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) queue_writel(queue, ISR, MACB_BIT(ISR_ROVR)); @@ -3102,6 +3104,7 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) if (!netif_running(bp->dev)) return nstat; + spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); nstat->rx_errors = (hwstat->rx_frame_check_sequence_errors + @@ -3131,6 +3134,7 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) nstat->tx_aborted_errors = hwstat->tx_excessive_collisions; nstat->tx_carrier_errors = hwstat->tx_carrier_sense_errors; nstat->tx_fifo_errors = hwstat->tx_underrun; + spin_unlock_irq(&bp->stats_lock); return nstat; } @@ -3138,12 +3142,13 @@ static struct net_device_stats *gem_get_stats(struct macb *bp) static void gem_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { - struct macb *bp; + struct macb *bp = netdev_priv(dev); - bp = netdev_priv(dev); + spin_lock_irq(&bp->stats_lock); gem_update_stats(bp); memcpy(data, &bp->ethtool_stats, sizeof(u64) * (GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES)); + spin_unlock_irq(&bp->stats_lock); } static int gem_get_sset_count(struct net_device *dev, int sset) @@ -3193,6 +3198,7 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev) return gem_get_stats(bp); /* read stats from hardware */ + spin_lock_irq(&bp->stats_lock); macb_update_stats(bp); /* Convert HW stats into netdevice stats */ @@ -3226,6 +3232,7 @@ static struct net_device_stats *macb_get_stats(struct net_device *dev) nstat->tx_carrier_errors = hwstat->tx_carrier_errors; nstat->tx_fifo_errors = hwstat->tx_underruns; /* Don't know about heartbeat or window errors... */ + spin_unlock_irq(&bp->stats_lock); return nstat; } @@ -5097,6 +5104,7 @@ static int macb_probe(struct platform_device *pdev) } } spin_lock_init(&bp->lock); + spin_lock_init(&bp->stats_lock); /* setup capabilities */ macb_configure_caps(bp, macb_config); diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 6a6fc819dfde..2106861463e4 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -167,6 +167,24 @@ static bool enetc_skb_is_tcp(struct sk_buff *skb) return skb->csum_offset == offsetof(struct tcphdr, check); } +/** + * enetc_unwind_tx_frame() - Unwind the DMA mappings of a multi-buffer Tx frame + * @tx_ring: Pointer to the Tx ring on which the buffer descriptors are located + * @count: Number of Tx buffer descriptors which need to be unmapped + * @i: Index of the last successfully mapped Tx buffer descriptor + */ +static void enetc_unwind_tx_frame(struct enetc_bdr *tx_ring, int count, int i) +{ + while (count--) { + struct enetc_tx_swbd *tx_swbd = &tx_ring->tx_swbd[i]; + + enetc_free_tx_frame(tx_ring, tx_swbd); + if (i == 0) + i = tx_ring->bd_count; + i--; + } +} + static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) { bool do_vlan, do_onestep_tstamp = false, do_twostep_tstamp = false; @@ -279,9 +297,11 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) } if (do_onestep_tstamp) { - u32 lo, hi, val; - u64 sec, nsec; + __be32 new_sec_l, new_nsec; + u32 lo, hi, nsec, val; + __be16 new_sec_h; u8 *data; + u64 sec; lo = enetc_rd_hot(hw, ENETC_SICTR0); hi = enetc_rd_hot(hw, ENETC_SICTR1); @@ -295,13 +315,38 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) /* Update originTimestamp field of Sync packet * - 48 bits seconds field * - 32 bits nanseconds field + * + * In addition, the UDP checksum needs to be updated + * by software after updating originTimestamp field, + * otherwise the hardware will calculate the wrong + * checksum when updating the correction field and + * update it to the packet. */ data = skb_mac_header(skb); - *(__be16 *)(data + offset2) = - htons((sec >> 32) & 0xffff); - *(__be32 *)(data + offset2 + 2) = - htonl(sec & 0xffffffff); - *(__be32 *)(data + offset2 + 6) = htonl(nsec); + new_sec_h = htons((sec >> 32) & 0xffff); + new_sec_l = htonl(sec & 0xffffffff); + new_nsec = htonl(nsec); + if (udp) { + struct udphdr *uh = udp_hdr(skb); + __be32 old_sec_l, old_nsec; + __be16 old_sec_h; + + old_sec_h = *(__be16 *)(data + offset2); + inet_proto_csum_replace2(&uh->check, skb, old_sec_h, + new_sec_h, false); + + old_sec_l = *(__be32 *)(data + offset2 + 2); + inet_proto_csum_replace4(&uh->check, skb, old_sec_l, + new_sec_l, false); + + old_nsec = *(__be32 *)(data + offset2 + 6); + inet_proto_csum_replace4(&uh->check, skb, old_nsec, + new_nsec, false); + } + + *(__be16 *)(data + offset2) = new_sec_h; + *(__be32 *)(data + offset2 + 2) = new_sec_l; + *(__be32 *)(data + offset2 + 6) = new_nsec; /* Configure single-step register */ val = ENETC_PM0_SINGLE_STEP_EN; @@ -372,25 +417,20 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) dma_err: dev_err(tx_ring->dev, "DMA map error"); - do { - tx_swbd = &tx_ring->tx_swbd[i]; - enetc_free_tx_frame(tx_ring, tx_swbd); - if (i == 0) - i = tx_ring->bd_count; - i--; - } while (count--); + enetc_unwind_tx_frame(tx_ring, count, i); return 0; } -static void enetc_map_tx_tso_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb, - struct enetc_tx_swbd *tx_swbd, - union enetc_tx_bd *txbd, int *i, int hdr_len, - int data_len) +static int enetc_map_tx_tso_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb, + struct enetc_tx_swbd *tx_swbd, + union enetc_tx_bd *txbd, int *i, int hdr_len, + int data_len) { union enetc_tx_bd txbd_tmp; u8 flags = 0, e_flags = 0; dma_addr_t addr; + int count = 1; enetc_clear_tx_bd(&txbd_tmp); addr = tx_ring->tso_headers_dma + *i * TSO_HEADER_SIZE; @@ -433,7 +473,10 @@ static void enetc_map_tx_tso_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb, /* Write the BD */ txbd_tmp.ext.e_flags = e_flags; *txbd = txbd_tmp; + count++; } + + return count; } static int enetc_map_tx_tso_data(struct enetc_bdr *tx_ring, struct sk_buff *skb, @@ -790,9 +833,9 @@ static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb /* compute the csum over the L4 header */ csum = enetc_tso_hdr_csum(&tso, skb, hdr, hdr_len, &pos); - enetc_map_tx_tso_hdr(tx_ring, skb, tx_swbd, txbd, &i, hdr_len, data_len); + count += enetc_map_tx_tso_hdr(tx_ring, skb, tx_swbd, txbd, + &i, hdr_len, data_len); bd_data_num = 0; - count++; while (data_len > 0) { int size; @@ -816,8 +859,13 @@ static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb err = enetc_map_tx_tso_data(tx_ring, skb, tx_swbd, txbd, tso.data, size, size == data_len); - if (err) + if (err) { + if (i == 0) + i = tx_ring->bd_count; + i--; + goto err_map_data; + } data_len -= size; count++; @@ -846,13 +894,7 @@ err_map_data: dev_err(tx_ring->dev, "DMA map error"); err_chained_bd: - do { - tx_swbd = &tx_ring->tx_swbd[i]; - enetc_free_tx_frame(tx_ring, tx_swbd); - if (i == 0) - i = tx_ring->bd_count; - i--; - } while (count--); + enetc_unwind_tx_frame(tx_ring, count, i); return 0; } @@ -1901,7 +1943,7 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring, enetc_xdp_drop(rx_ring, orig_i, i); tx_ring->stats.xdp_tx_drops++; } else { - tx_ring->stats.xdp_tx += xdp_tx_bd_cnt; + tx_ring->stats.xdp_tx++; rx_ring->xdp.xdp_tx_in_flight += xdp_tx_bd_cnt; xdp_tx_frm_cnt++; /* The XDP_TX enqueue was successful, so we @@ -3228,6 +3270,9 @@ static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr) new_offloads |= ENETC_F_TX_TSTAMP; break; case HWTSTAMP_TX_ONESTEP_SYNC: + if (!enetc_si_is_pf(priv->si)) + return -EOPNOTSUPP; + new_offloads &= ~ENETC_F_TX_TSTAMP_MASK; new_offloads |= ENETC_F_TX_ONESTEP_SYNC_TSTAMP; break; diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index fc41078c4f5d..73ac8c6afb3a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -672,7 +672,6 @@ err_link_init: err_alloc_msix: err_config_si: err_clk_get: - mutex_destroy(&priv->mm_lock); free_netdev(ndev); return err; @@ -684,6 +683,7 @@ static void enetc4_pf_netdev_destroy(struct enetc_si *si) struct net_device *ndev = si->ndev; unregister_netdev(ndev); + enetc4_link_deinit(priv); enetc_free_msix(priv); free_netdev(ndev); } diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index bf34b5bb1e35..ece3ae28ba82 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -832,6 +832,7 @@ static int enetc_set_coalesce(struct net_device *ndev, static int enetc_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { + struct enetc_ndev_priv *priv = netdev_priv(ndev); int *phc_idx; phc_idx = symbol_get(enetc_phc_index); @@ -852,8 +853,10 @@ static int enetc_get_ts_info(struct net_device *ndev, SOF_TIMESTAMPING_TX_SOFTWARE; info->tx_types = (1 << HWTSTAMP_TX_OFF) | - (1 << HWTSTAMP_TX_ON) | - (1 << HWTSTAMP_TX_ONESTEP_SYNC); + (1 << HWTSTAMP_TX_ON); + + if (enetc_si_is_pf(priv->si)) + info->tx_types |= (1 << HWTSTAMP_TX_ONESTEP_SYNC); info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_ALL); diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 8167cc5fb0df..78d2a19593d1 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -1116,6 +1116,16 @@ static inline u32 gve_xdp_tx_start_queue_id(struct gve_priv *priv) return gve_xdp_tx_queue_id(priv, 0); } +static inline bool gve_supports_xdp_xmit(struct gve_priv *priv) +{ + switch (priv->queue_format) { + case GVE_GQI_QPL_FORMAT: + return true; + default: + return false; + } +} + /* gqi napi handler defined in gve_main.c */ int gve_napi_poll(struct napi_struct *napi, int budget); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 533e659b15b3..92237fb0b60c 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1903,6 +1903,8 @@ static void gve_turndown(struct gve_priv *priv) /* Stop tx queues */ netif_tx_disable(priv->dev); + xdp_features_clear_redirect_target(priv->dev); + gve_clear_napi_enabled(priv); gve_clear_report_stats(priv); @@ -1972,6 +1974,9 @@ static void gve_turnup(struct gve_priv *priv) napi_schedule(&block->napi); } + if (priv->num_xdp_queues && gve_supports_xdp_xmit(priv)) + xdp_features_set_redirect_target(priv->dev, false); + gve_set_napi_enabled(priv); } @@ -2246,7 +2251,6 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv) if (priv->queue_format == GVE_GQI_QPL_FORMAT) { xdp_features = NETDEV_XDP_ACT_BASIC; xdp_features |= NETDEV_XDP_ACT_REDIRECT; - xdp_features |= NETDEV_XDP_ACT_NDO_XMIT; xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; } else { xdp_features = 0; diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 8ac0047f1ada..f0674a443567 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -109,10 +109,12 @@ static void gve_rx_reset_ring_dqo(struct gve_priv *priv, int idx) void gve_rx_stop_ring_dqo(struct gve_priv *priv, int idx) { int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); + struct gve_rx_ring *rx = &priv->rx[idx]; if (!gve_rx_was_added_to_block(priv, idx)) return; + page_pool_disable_direct_recycling(rx->dqo.page_pool); gve_remove_napi(priv, ntfy_idx); gve_rx_remove_from_block(priv, idx); gve_rx_reset_ring_dqo(priv, idx); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index e95ae0d39948..0676fc547b6f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2408,6 +2408,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) dma_addr_t data_dma_addr; struct netdev_queue *txq; unsigned long lpar_rc; + unsigned int skblen; union sub_crq tx_crq; unsigned int offset; bool use_scrq_send_direct = false; @@ -2522,6 +2523,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_buff->skb = skb; tx_buff->index = bufidx; tx_buff->pool_index = queue_num; + skblen = skb->len; memset(&tx_crq, 0, sizeof(tx_crq)); tx_crq.v1.first = IBMVNIC_CRQ_CMD; @@ -2614,7 +2616,7 @@ early_exit: netif_stop_subqueue(netdev, queue_num); } - tx_bytes += skb->len; + tx_bytes += skblen; txq_trans_cond_update(txq); ret = NETDEV_TX_OK; goto out; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2d7a18fcc3be..6faa62bced3a 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1983,7 +1983,7 @@ err: static void iavf_finish_config(struct work_struct *work) { struct iavf_adapter *adapter; - bool netdev_released = false; + bool locks_released = false; int pairs, err; adapter = container_of(work, struct iavf_adapter, finish_config); @@ -2012,19 +2012,22 @@ static void iavf_finish_config(struct work_struct *work) netif_set_real_num_tx_queues(adapter->netdev, pairs); if (adapter->netdev->reg_state != NETREG_REGISTERED) { + mutex_unlock(&adapter->crit_lock); netdev_unlock(adapter->netdev); - netdev_released = true; + locks_released = true; err = register_netdevice(adapter->netdev); if (err) { dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n", err); /* go back and try again.*/ + mutex_lock(&adapter->crit_lock); iavf_free_rss(adapter); iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); iavf_change_state(adapter, __IAVF_INIT_CONFIG_ADAPTER); + mutex_unlock(&adapter->crit_lock); goto out; } } @@ -2040,9 +2043,10 @@ static void iavf_finish_config(struct work_struct *work) } out: - mutex_unlock(&adapter->crit_lock); - if (!netdev_released) + if (!locks_released) { + mutex_unlock(&adapter->crit_lock); netdev_unlock(adapter->netdev); + } rtnl_unlock(); } @@ -2903,8 +2907,8 @@ static void iavf_watchdog_task(struct work_struct *work) } mutex_unlock(&adapter->crit_lock); - netdev_unlock(netdev); restart_watchdog: + netdev_unlock(netdev); if (adapter->state >= __IAVF_DOWN) queue_work(adapter->wq, &adapter->adminq_task); if (adapter->aq_required) diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index d116e2b10bce..dbdb83567364 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -981,6 +981,9 @@ static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv /* preallocate memory for ice_sched_node */ node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + *priv = node; return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c index fb527434b58b..d649c197cf67 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.c +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -38,8 +38,7 @@ static int ice_eswitch_setup_env(struct ice_pf *pf) if (ice_vsi_add_vlan_zero(uplink_vsi)) goto err_vlan_zero; - if (ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, true, - ICE_FLTR_RX)) + if (ice_set_dflt_vsi(uplink_vsi)) goto err_def_rx; if (ice_cfg_dflt_vsi(uplink_vsi->port_info, uplink_vsi->idx, true, diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index b83f99c01d91..8aabf7749aa5 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -36,6 +36,7 @@ static void ice_free_vf_entries(struct ice_pf *pf) hash_for_each_safe(vfs->table, bkt, tmp, vf, entry) { hash_del_rcu(&vf->entry); + ice_deinitialize_vf_entry(vf); ice_put_vf(vf); } } @@ -193,10 +194,6 @@ void ice_free_vfs(struct ice_pf *pf) wr32(hw, GLGEN_VFLRSTAT(reg_idx), BIT(bit_idx)); } - /* clear malicious info since the VF is getting released */ - if (!ice_is_feature_supported(pf, ICE_F_MBX_LIMIT)) - list_del(&vf->mbx_info.list_entry); - mutex_unlock(&vf->cfg_lock); } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 5d2d7736fd5f..9c9ea4c1b93b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -527,15 +527,14 @@ err: * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action - * @rx_buf: Rx buffer to store the XDP action * @eop_desc: Last descriptor in packet to read metadata from * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ -static void +static u32 ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, - struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc) + union ice_32b_rx_flex_desc *eop_desc) { unsigned int ret = ICE_XDP_PASS; u32 act; @@ -574,7 +573,7 @@ out_failure: ret = ICE_XDP_CONSUMED; } exit: - ice_set_rx_bufs_act(xdp, rx_ring, ret); + return ret; } /** @@ -860,10 +859,8 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, xdp_buff_set_frags_flag(xdp); } - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { - ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); + if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) return -ENOMEM; - } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, rx_buf->page_offset, size); @@ -924,7 +921,6 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[ntc]; - rx_buf->pgcnt = page_count(rx_buf->page); prefetchw(rx_buf->page); if (!size) @@ -941,6 +937,31 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, } /** + * ice_get_pgcnts - grab page_count() for gathered fragments + * @rx_ring: Rx descriptor ring to store the page counts on + * + * This function is intended to be called right before running XDP + * program so that the page recycling mechanism will be able to take + * a correct decision regarding underlying pages; this is done in such + * way as XDP program can change the refcount of page + */ +static void ice_get_pgcnts(struct ice_rx_ring *rx_ring) +{ + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; + struct ice_rx_buf *rx_buf; + u32 cnt = rx_ring->count; + + for (int i = 0; i < nr_frags; i++) { + rx_buf = &rx_ring->rx_buf[idx]; + rx_buf->pgcnt = page_count(rx_buf->page); + + if (++idx == cnt) + idx = 0; + } +} + +/** * ice_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data @@ -1051,12 +1072,12 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) rx_buf->page_offset + headlen, size, xdp->frame_sz); } else { - /* buffer is unused, change the act that should be taken later - * on; data was copied onto skb's linear part so there's no + /* buffer is unused, restore biased page count in Rx buffer; + * data was copied onto skb's linear part so there's no * need for adjusting page offset and we can reuse this buffer * as-is */ - rx_buf->act = ICE_SKB_CONSUMED; + rx_buf->pagecnt_bias++; } if (unlikely(xdp_buff_has_frags(xdp))) { @@ -1104,6 +1125,65 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) } /** + * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags + * @rx_ring: Rx ring with all the auxiliary data + * @xdp: XDP buffer carrying linear + frags part + * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage + * @ntc: a current next_to_clean value to be stored at rx_ring + * @verdict: return code from XDP program execution + * + * Walk through gathered fragments and satisfy internal page + * recycle mechanism; we take here an action related to verdict + * returned by XDP program; + */ +static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, + u32 *xdp_xmit, u32 ntc, u32 verdict) +{ + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; + u32 cnt = rx_ring->count; + u32 post_xdp_frags = 1; + struct ice_rx_buf *buf; + int i; + + if (unlikely(xdp_buff_has_frags(xdp))) + post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags; + + for (i = 0; i < post_xdp_frags; i++) { + buf = &rx_ring->rx_buf[idx]; + + if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + *xdp_xmit |= verdict; + } else if (verdict & ICE_XDP_CONSUMED) { + buf->pagecnt_bias++; + } else if (verdict == ICE_XDP_PASS) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + } + + ice_put_rx_buf(rx_ring, buf); + + if (++idx == cnt) + idx = 0; + } + /* handle buffers that represented frags released by XDP prog; + * for these we keep pagecnt_bias as-is; refcount from struct page + * has been decremented within XDP prog and we do not have to increase + * the biased refcnt + */ + for (; i < nr_frags; i++) { + buf = &rx_ring->rx_buf[idx]; + ice_put_rx_buf(rx_ring, buf); + if (++idx == cnt) + idx = 0; + } + + xdp->data = NULL; + rx_ring->first_desc = ntc; + rx_ring->nr_frags = 0; +} + +/** * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: Rx descriptor ring to transact packets on * @budget: Total limit on number of packets to process @@ -1120,15 +1200,13 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_pkts = 0; unsigned int offset = rx_ring->rx_offset; struct xdp_buff *xdp = &rx_ring->xdp; - u32 cached_ntc = rx_ring->first_desc; struct ice_tx_ring *xdp_ring = NULL; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; + u32 cached_ntu, xdp_verdict; u32 cnt = rx_ring->count; u32 xdp_xmit = 0; - u32 cached_ntu; bool failure; - u32 first; xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { @@ -1190,6 +1268,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { + ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED); break; } if (++ntc == cnt) @@ -1199,15 +1278,15 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; - ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc); - if (rx_buf->act == ICE_XDP_PASS) + ice_get_pgcnts(rx_ring); + xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc); + if (xdp_verdict == ICE_XDP_PASS) goto construct_skb; total_rx_bytes += xdp_get_buff_len(xdp); total_rx_pkts++; - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + continue; construct_skb: if (likely(ice_ring_uses_build_skb(rx_ring))) @@ -1217,18 +1296,12 @@ construct_skb: /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->ring_stats->rx_stats.alloc_page_failed++; - rx_buf->act = ICE_XDP_CONSUMED; - if (unlikely(xdp_buff_has_frags(xdp))) - ice_set_rx_bufs_act(xdp, rx_ring, - ICE_XDP_CONSUMED); - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; - break; + xdp_verdict = ICE_XDP_CONSUMED; } - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + + if (!skb) + break; stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, @@ -1257,23 +1330,6 @@ construct_skb: total_rx_pkts++; } - first = rx_ring->first_desc; - while (cached_ntc != first) { - struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc]; - - if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - xdp_xmit |= buf->act; - } else if (buf->act & ICE_XDP_CONSUMED) { - buf->pagecnt_bias++; - } else if (buf->act == ICE_XDP_PASS) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - } - - ice_put_rx_buf(rx_ring, buf); - if (++cached_ntc >= cnt) - cached_ntc = 0; - } rx_ring->next_to_clean = ntc; /* return up to cleaned_count buffers to hardware */ failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index cb347c852ba9..806bce701df3 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -201,7 +201,6 @@ struct ice_rx_buf { struct page *page; unsigned int page_offset; unsigned int pgcnt; - unsigned int act; unsigned int pagecnt_bias; }; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 79f960c6680d..6cf32b404127 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -6,49 +6,6 @@ #include "ice.h" /** - * ice_set_rx_bufs_act - propagate Rx buffer action to frags - * @xdp: XDP buffer representing frame (linear and frags part) - * @rx_ring: Rx ring struct - * act: action to store onto Rx buffers related to XDP buffer parts - * - * Set action that should be taken before putting Rx buffer from first frag - * to the last. - */ -static inline void -ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring, - const unsigned int act) -{ - u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; - u32 nr_frags = rx_ring->nr_frags + 1; - u32 idx = rx_ring->first_desc; - u32 cnt = rx_ring->count; - struct ice_rx_buf *buf; - - for (int i = 0; i < nr_frags; i++) { - buf = &rx_ring->rx_buf[idx]; - buf->act = act; - - if (++idx == cnt) - idx = 0; - } - - /* adjust pagecnt_bias on frags freed by XDP prog */ - if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) { - u32 delta = rx_ring->nr_frags - sinfo_frags; - - while (delta) { - if (idx == 0) - idx = cnt - 1; - else - idx--; - buf = &rx_ring->rx_buf[idx]; - buf->pagecnt_bias--; - delta--; - } - } -} - -/** * ice_test_staterr - tests bits in Rx descriptor status and error fields * @status_err_n: Rx descriptor status_error0 or status_error1 bits * @stat_err_bits: value to mask diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index c7c0c2f50c26..815ad0bfe832 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -1036,6 +1036,14 @@ void ice_initialize_vf_entry(struct ice_vf *vf) mutex_init(&vf->cfg_lock); } +void ice_deinitialize_vf_entry(struct ice_vf *vf) +{ + struct ice_pf *pf = vf->pf; + + if (!ice_is_feature_supported(pf, ICE_F_MBX_LIMIT)) + list_del(&vf->mbx_info.list_entry); +} + /** * ice_dis_vf_qs - Disable the VF queues * @vf: pointer to the VF structure diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h index 0c7e77c0a09f..5392b0404986 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib_private.h @@ -24,6 +24,7 @@ #endif void ice_initialize_vf_entry(struct ice_vf *vf); +void ice_deinitialize_vf_entry(struct ice_vf *vf); void ice_dis_vf_qs(struct ice_vf *vf); int ice_check_vf_init(struct ice_vf *vf); enum virtchnl_status_code ice_err_to_virt_err(int err); diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index b4fbb99bfad2..a3d6b8f198a8 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -2159,8 +2159,13 @@ static int idpf_open(struct net_device *netdev) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); + err = idpf_set_real_num_queues(vport); + if (err) + goto unlock; + err = idpf_vport_open(vport); +unlock: idpf_vport_ctrl_unlock(netdev); return err; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 2fa9c36e33c9..977741c41498 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3008,14 +3008,11 @@ static int idpf_rx_rsc(struct idpf_rx_queue *rxq, struct sk_buff *skb, return -EINVAL; rsc_segments = DIV_ROUND_UP(skb->data_len, rsc_seg_len); - if (unlikely(rsc_segments == 1)) - return 0; NAPI_GRO_CB(skb)->count = rsc_segments; skb_shinfo(skb)->gso_size = rsc_seg_len; skb_reset_network_header(skb); - len = skb->len - skb_transport_offset(skb); if (ipv4) { struct iphdr *ipv4h = ip_hdr(skb); @@ -3024,6 +3021,7 @@ static int idpf_rx_rsc(struct idpf_rx_queue *rxq, struct sk_buff *skb, /* Reset and set transport header offset in skb */ skb_set_transport_header(skb, sizeof(struct iphdr)); + len = skb->len - skb_transport_offset(skb); /* Compute the TCP pseudo header checksum*/ tcp_hdr(skb)->check = @@ -3033,6 +3031,7 @@ static int idpf_rx_rsc(struct idpf_rx_queue *rxq, struct sk_buff *skb, skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + len = skb->len - skb_transport_offset(skb); tcp_hdr(skb)->check = ~tcp_v6_check(len, &ipv6h->saddr, &ipv6h->daddr, 0); } @@ -3072,6 +3071,7 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, idpf_rx_hash(rxq, skb, rx_desc, decoded); skb->protocol = eth_type_trans(skb, rxq->netdev); + skb_record_rx_queue(skb, rxq->idx); if (le16_get_bits(rx_desc->hdrlen_flags, VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M)) @@ -3080,8 +3080,6 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, csum_bits = idpf_rx_splitq_extract_csum_bits(rx_desc); idpf_rx_csum(rxq, skb, csum_bits, decoded); - skb_record_rx_queue(skb, rxq->idx); - return 0; } diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 56a35d58e7a6..84307bb7313e 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1096,6 +1096,7 @@ static int igc_init_empty_frame(struct igc_ring *ring, return -ENOMEM; } + buffer->type = IGC_TX_BUFFER_TYPE_SKB; buffer->skb = skb; buffer->protocol = 0; buffer->bytecount = skb->len; @@ -2701,8 +2702,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget) } static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, - struct xdp_buff *xdp) + struct igc_xdp_buff *ctx) { + struct xdp_buff *xdp = &ctx->xdp; unsigned int totalsize = xdp->data_end - xdp->data_meta; unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; @@ -2721,27 +2723,28 @@ static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring, __skb_pull(skb, metasize); } + if (ctx->rx_ts) { + skb_shinfo(skb)->tx_flags |= SKBTX_HW_TSTAMP_NETDEV; + skb_hwtstamps(skb)->netdev_data = ctx->rx_ts; + } + return skb; } static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector, union igc_adv_rx_desc *desc, - struct xdp_buff *xdp, - ktime_t timestamp) + struct igc_xdp_buff *ctx) { struct igc_ring *ring = q_vector->rx.ring; struct sk_buff *skb; - skb = igc_construct_skb_zc(ring, xdp); + skb = igc_construct_skb_zc(ring, ctx); if (!skb) { ring->rx_stats.alloc_failed++; set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &ring->flags); return; } - if (timestamp) - skb_hwtstamps(skb)->hwtstamp = timestamp; - if (igc_cleanup_headers(ring, desc, skb)) return; @@ -2777,7 +2780,6 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) union igc_adv_rx_desc *desc; struct igc_rx_buffer *bi; struct igc_xdp_buff *ctx; - ktime_t timestamp = 0; unsigned int size; int res; @@ -2807,6 +2809,8 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) */ bi->xdp->data_meta += IGC_TS_HDR_LEN; size -= IGC_TS_HDR_LEN; + } else { + ctx->rx_ts = NULL; } bi->xdp->data_end = bi->xdp->data + size; @@ -2815,7 +2819,7 @@ static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget) res = __igc_xdp_run_prog(adapter, prog, bi->xdp); switch (res) { case IGC_XDP_PASS: - igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp); + igc_dispatch_skb_zc(q_vector, desc, ctx); fallthrough; case IGC_XDP_CONSUMED: xsk_buff_free(bi->xdp); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index 683c668672d6..cb07ecd8937d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -1122,7 +1122,7 @@ static bool ixgbe_is_media_cage_present(struct ixgbe_hw *hw) * returns error (ENOENT), then no cage present. If no cage present then * connection type is backplane or BASE-T. */ - return ixgbe_aci_get_netlist_node(hw, cmd, NULL, NULL); + return !ixgbe_aci_get_netlist_node(hw, cmd, NULL, NULL); } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7236f20c9a30..467f81239e12 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2105,7 +2105,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, /* hand second half of page back to the ring */ ixgbe_reuse_rx_page(rx_ring, rx_buffer); } else { - if (!IS_ERR(skb) && IXGBE_CB(skb)->dma == rx_buffer->dma) { + if (skb && IXGBE_CB(skb)->dma == rx_buffer->dma) { /* the page has been released from the ring */ IXGBE_CB(skb)->page_released = true; } else { diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c index 1641791a2d5b..8ed83fb98862 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c @@ -324,7 +324,7 @@ static const struct mvpp2_cls_flow cls_flows[MVPP2_N_PRS_FLOWS] = { MVPP2_PRS_RI_VLAN_MASK), /* Non IP flow, with vlan tag */ MVPP2_DEF_FLOW(MVPP22_FLOW_ETHERNET, MVPP2_FL_NON_IP_TAG, - MVPP22_CLS_HEK_OPT_VLAN, + MVPP22_CLS_HEK_TAGGED, 0, 0), }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 8b7c843446e1..823c1ba456cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -564,6 +564,9 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport, struct mlx5_esw_sched_ return err; esw_qos_normalize_min_rate(parent->esw, parent, extack); + trace_mlx5_esw_vport_qos_create(vport->dev, vport, + vport->qos.sched_node->max_rate, + vport->qos.sched_node->bw_share); return 0; } @@ -591,8 +594,11 @@ static int mlx5_esw_qos_vport_enable(struct mlx5_vport *vport, enum sched_node_t sched_node->vport = vport; vport->qos.sched_node = sched_node; err = esw_qos_vport_enable(vport, parent, extack); - if (err) + if (err) { + __esw_qos_free_node(sched_node); esw_qos_put(esw); + vport->qos.sched_node = NULL; + } return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 7db9cab9bedf..d9362eabc6a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -572,7 +572,7 @@ irq_pool_alloc(struct mlx5_core_dev *dev, int start, int size, char *name, pool->min_threshold = min_threshold * MLX5_EQ_REFS_PER_IRQ; pool->max_threshold = max_threshold * MLX5_EQ_REFS_PER_IRQ; mlx5_core_dbg(dev, "pool->name = %s, pool->size = %d, pool->start = %d", - name, size, start); + name ? name : "mlx5_pcif_pool", size, start); return pool; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 2bed8c86b7cf..3f64cdbabfa3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -768,7 +768,9 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp); if (err) return; - mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); + err = mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); + if (err) + return; for (i = 0; i < len; i++) { data[data_index + i] = hw_stats[i].getter(ppcnt_pl); if (!hw_stats[i].cells_bytes) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index 2ec62c8d86e1..59486fe2ad18 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -20,6 +20,8 @@ nfp_bpf_cmsg_alloc(struct nfp_app_bpf *bpf, unsigned int size) struct sk_buff *skb; skb = nfp_app_ctrl_msg_alloc(bpf->app, size, GFP_KERNEL); + if (!skb) + return NULL; skb_put(skb, size); return skb; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index bfe6e2d631bd..f5acfb7d4ff6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -516,6 +516,19 @@ static int loongson_dwmac_acpi_config(struct pci_dev *pdev, return 0; } +/* Loongson's DWMAC device may take nearly two seconds to complete DMA reset */ +static int loongson_dwmac_fix_reset(void *priv, void __iomem *ioaddr) +{ + u32 value = readl(ioaddr + DMA_BUS_MODE); + + value |= DMA_BUS_MODE_SFT_RESET; + writel(value, ioaddr + DMA_BUS_MODE); + + return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, + !(value & DMA_BUS_MODE_SFT_RESET), + 10000, 2000000); +} + static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct plat_stmmacenet_data *plat; @@ -566,6 +579,7 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id plat->bsp_priv = ld; plat->setup = loongson_dwmac_setup; + plat->fix_soc_reset = loongson_dwmac_fix_reset; ld->dev = &pdev->dev; ld->loongson_id = readl(res.addr + GMAC_VERSION) & 0xff; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d04543e5697b..c0ae7db96f46 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2094,6 +2094,11 @@ static int __alloc_dma_rx_desc_resources(struct stmmac_priv *priv, pp_params.offset = stmmac_rx_offset(priv); pp_params.max_len = dma_conf->dma_buf_sz; + if (priv->sph) { + pp_params.offset = 0; + pp_params.max_len += stmmac_rx_offset(priv); + } + rx_q->page_pool = page_pool_create(&pp_params); if (IS_ERR(rx_q->page_pool)) { ret = PTR_ERR(rx_q->page_pool); @@ -2424,6 +2429,11 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) u32 chan = 0; u8 qmode = 0; + if (rxfifosz == 0) + rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + /* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */ if (priv->plat->has_gmac4 || priv->plat->has_xgmac) { rxfifosz /= rx_channels_count; @@ -2892,6 +2902,11 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, int rxfifosz = priv->plat->rx_fifo_size; int txfifosz = priv->plat->tx_fifo_size; + if (rxfifosz == 0) + rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + /* Adjust for real per queue fifo size */ rxfifosz /= rx_channels_count; txfifosz /= tx_channels_count; @@ -5868,6 +5883,9 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) const int mtu = new_mtu; int ret; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + txfifosz /= priv->plat->tx_queues_to_use; if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) { @@ -7219,29 +7237,15 @@ static int stmmac_hw_init(struct stmmac_priv *priv) priv->plat->tx_queues_to_use = priv->dma_cap.number_tx_queues; } - if (!priv->plat->rx_fifo_size) { - if (priv->dma_cap.rx_fifo_size) { - priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; - } else { - dev_err(priv->device, "Can't specify Rx FIFO size\n"); - return -ENODEV; - } - } else if (priv->dma_cap.rx_fifo_size && - priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { + if (priv->dma_cap.rx_fifo_size && + priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { dev_warn(priv->device, "Rx FIFO size (%u) exceeds dma capability\n", priv->plat->rx_fifo_size); priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; } - if (!priv->plat->tx_fifo_size) { - if (priv->dma_cap.tx_fifo_size) { - priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size; - } else { - dev_err(priv->device, "Can't specify Tx FIFO size\n"); - return -ENODEV; - } - } else if (priv->dma_cap.tx_fifo_size && - priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { + if (priv->dma_cap.tx_fifo_size && + priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { dev_warn(priv->device, "Tx FIFO size (%u) exceeds dma capability\n", priv->plat->tx_fifo_size); diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 0d5a862cd78a..3a13d60a947a 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -99,6 +99,7 @@ config TI_K3_AM65_CPSW_NUSS select NET_DEVLINK select TI_DAVINCI_MDIO select PHYLINK + select PAGE_POOL select TI_K3_CPPI_DESC_POOL imply PHY_TI_GMII_SEL depends on TI_K3_AM65_CPTS || !TI_K3_AM65_CPTS diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index b663271e79f7..2806238629f8 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -828,21 +828,30 @@ static void am65_cpsw_nuss_xmit_free(struct am65_cpsw_tx_chn *tx_chn, static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma) { struct am65_cpsw_tx_chn *tx_chn = data; + enum am65_cpsw_tx_buf_type buf_type; struct cppi5_host_desc_t *desc_tx; + struct xdp_frame *xdpf; struct sk_buff *skb; void **swdata; desc_tx = k3_cppi_desc_pool_dma2virt(tx_chn->desc_pool, desc_dma); swdata = cppi5_hdesc_get_swdata(desc_tx); - skb = *(swdata); - am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); + buf_type = am65_cpsw_nuss_buf_type(tx_chn, desc_dma); + if (buf_type == AM65_CPSW_TX_BUF_TYPE_SKB) { + skb = *(swdata); + dev_kfree_skb_any(skb); + } else { + xdpf = *(swdata); + xdp_return_frame(xdpf); + } - dev_kfree_skb_any(skb); + am65_cpsw_nuss_xmit_free(tx_chn, desc_tx); } static struct sk_buff *am65_cpsw_build_skb(void *page_addr, struct net_device *ndev, - unsigned int len) + unsigned int len, + unsigned int headroom) { struct sk_buff *skb; @@ -852,7 +861,7 @@ static struct sk_buff *am65_cpsw_build_skb(void *page_addr, if (unlikely(!skb)) return NULL; - skb_reserve(skb, AM65_CPSW_HEADROOM); + skb_reserve(skb, headroom); skb->dev = ndev; return skb; @@ -1169,9 +1178,11 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, struct xdp_frame *xdpf; struct bpf_prog *prog; struct page *page; + int pkt_len; u32 act; int err; + pkt_len = *len; prog = READ_ONCE(port->xdp_prog); if (!prog) return AM65_CPSW_XDP_PASS; @@ -1189,8 +1200,10 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, netif_txq = netdev_get_tx_queue(ndev, tx_chn->id); xdpf = xdp_convert_buff_to_frame(xdp); - if (unlikely(!xdpf)) + if (unlikely(!xdpf)) { + ndev->stats.tx_dropped++; goto drop; + } __netif_tx_lock(netif_txq, cpu); err = am65_cpsw_xdp_tx_frame(ndev, tx_chn, xdpf, @@ -1199,14 +1212,14 @@ static int am65_cpsw_run_xdp(struct am65_cpsw_rx_flow *flow, if (err) goto drop; - dev_sw_netstats_tx_add(ndev, 1, *len); + dev_sw_netstats_rx_add(ndev, pkt_len); ret = AM65_CPSW_XDP_CONSUMED; goto out; case XDP_REDIRECT: if (unlikely(xdp_do_redirect(ndev, xdp, prog))) goto drop; - dev_sw_netstats_rx_add(ndev, *len); + dev_sw_netstats_rx_add(ndev, pkt_len); ret = AM65_CPSW_XDP_REDIRECT; goto out; default: @@ -1315,16 +1328,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, dev_dbg(dev, "%s rx csum_info:%#x\n", __func__, csum_info); dma_unmap_single(rx_chn->dma_dev, buf_dma, buf_dma_len, DMA_FROM_DEVICE); - k3_cppi_desc_pool_free(rx_chn->desc_pool, desc_rx); - skb = am65_cpsw_build_skb(page_addr, ndev, - AM65_CPSW_MAX_PACKET_SIZE); - if (unlikely(!skb)) { - new_page = page; - goto requeue; - } - if (port->xdp_prog) { xdp_init_buff(&xdp, PAGE_SIZE, &port->xdp_rxq[flow->id]); xdp_prepare_buff(&xdp, page_addr, AM65_CPSW_HEADROOM, @@ -1334,9 +1339,16 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_rx_flow *flow, if (*xdp_state != AM65_CPSW_XDP_PASS) goto allocate; - /* Compute additional headroom to be reserved */ - headroom = (xdp.data - xdp.data_hard_start) - skb_headroom(skb); - skb_reserve(skb, headroom); + headroom = xdp.data - xdp.data_hard_start; + } else { + headroom = AM65_CPSW_HEADROOM; + } + + skb = am65_cpsw_build_skb(page_addr, ndev, + AM65_CPSW_MAX_PACKET_SIZE, headroom); + if (unlikely(!skb)) { + new_page = page; + goto requeue; } ndev_priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index 768578c0d958..d59c1744840a 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -474,26 +474,7 @@ static int icss_iep_perout_enable_hw(struct icss_iep *iep, static int icss_iep_perout_enable(struct icss_iep *iep, struct ptp_perout_request *req, int on) { - int ret = 0; - - mutex_lock(&iep->ptp_clk_mutex); - - if (iep->pps_enabled) { - ret = -EBUSY; - goto exit; - } - - if (iep->perout_enabled == !!on) - goto exit; - - ret = icss_iep_perout_enable_hw(iep, req, on); - if (!ret) - iep->perout_enabled = !!on; - -exit: - mutex_unlock(&iep->ptp_clk_mutex); - - return ret; + return -EOPNOTSUPP; } static void icss_iep_cap_cmp_work(struct work_struct *work) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 9e7fa012e4fa..f33178f90c42 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -2897,6 +2897,7 @@ static int axienet_probe(struct platform_device *pdev) lp->phylink_config.dev = &ndev->dev; lp->phylink_config.type = PHYLINK_NETDEV; + lp->phylink_config.mac_managed_pm = true; lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE | MAC_10FD | MAC_100FD | MAC_1000FD; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 642155cb8315..dbb3960126ee 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1902,21 +1902,9 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *geneve, *next; - struct net_device *dev, *aux; - /* gather any geneve devices that were moved into this ns */ - for_each_netdev_safe(net, dev, aux) - if (dev->rtnl_link_ops == &geneve_link_ops) - unregister_netdevice_queue(dev, head); - - /* now gather any other geneve devices that were created in this ns */ - list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) { - /* If geneve->dev is in the same netns, it was already added - * to the list by the previous loop. - */ - if (!net_eq(dev_net(geneve->dev), net)) - unregister_netdevice_queue(geneve->dev, head); - } + list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) + geneve_dellink(geneve->dev, head); } static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list, diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index d64740bf44ed..b7b46c5e6399 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -2481,11 +2481,6 @@ static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list, list_for_each_entry(net, net_list, exit_list) { struct gtp_net *gn = net_generic(net, gtp_net_id); struct gtp_dev *gtp, *gtp_next; - struct net_device *dev; - - for_each_netdev(net, dev) - if (dev->rtnl_link_ops == >p_link_ops) - gtp_dellink(dev, dev_to_kill); list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list) gtp_dellink(gtp->dev, dev_to_kill); diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index fd591ddb3884..ca62188a317a 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -416,20 +416,25 @@ struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, static noinline_for_stack int ipvlan_process_v4_outbound(struct sk_buff *skb) { - const struct iphdr *ip4h = ip_hdr(skb); struct net_device *dev = skb->dev; struct net *net = dev_net(dev); - struct rtable *rt; int err, ret = NET_XMIT_DROP; + const struct iphdr *ip4h; + struct rtable *rt; struct flowi4 fl4 = { .flowi4_oif = dev->ifindex, - .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)), .flowi4_flags = FLOWI_FLAG_ANYSRC, .flowi4_mark = skb->mark, - .daddr = ip4h->daddr, - .saddr = ip4h->saddr, }; + if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) + goto err; + + ip4h = ip_hdr(skb); + fl4.daddr = ip4h->daddr; + fl4.saddr = ip4h->saddr; + fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)); + rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; @@ -488,6 +493,12 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb) struct net_device *dev = skb->dev; int err, ret = NET_XMIT_DROP; + if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) { + DEV_STATS_INC(dev, tx_errors); + kfree_skb(skb); + return ret; + } + err = ipvlan_route_v6_outbound(dev, skb); if (unlikely(err)) { DEV_STATS_INC(dev, tx_errors); diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index c8840c3b9a1b..f1d68153987e 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -244,8 +244,22 @@ static netdev_tx_t blackhole_netdev_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static int blackhole_neigh_output(struct neighbour *n, struct sk_buff *skb) +{ + kfree_skb(skb); + return 0; +} + +static int blackhole_neigh_construct(struct net_device *dev, + struct neighbour *n) +{ + n->output = blackhole_neigh_output; + return 0; +} + static const struct net_device_ops blackhole_netdev_ops = { .ndo_start_xmit = blackhole_netdev_xmit, + .ndo_neigh_construct = blackhole_neigh_construct, }; /* This is a dst-dummy device used specifically for invalidated diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 5c80fbee7913..7ab358616e03 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -184,9 +184,11 @@ static const struct ethtool_ops nsim_ethtool_ops = { static void nsim_ethtool_ring_init(struct netdevsim *ns) { + ns->ethtool.ring.rx_pending = 512; ns->ethtool.ring.rx_max_pending = 4096; ns->ethtool.ring.rx_jumbo_max_pending = 4096; ns->ethtool.ring.rx_mini_max_pending = 4096; + ns->ethtool.ring.tx_pending = 512; ns->ethtool.ring.tx_max_pending = 4096; } diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 214b62fba991..b00a315de060 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2265,12 +2265,15 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy, /* Allow the MAC to stop its clock if the PHY has the capability */ pl->mac_tx_clk_stop = phy_eee_tx_clock_stop_capable(phy) > 0; - /* Explicitly configure whether the PHY is allowed to stop it's - * receive clock. - */ - ret = phy_eee_rx_clock_stop(phy, pl->config->eee_rx_clk_stop_enable); - if (ret == -EOPNOTSUPP) - ret = 0; + if (pl->mac_supports_eee_ops) { + /* Explicitly configure whether the PHY is allowed to stop it's + * receive clock. + */ + ret = phy_eee_rx_clock_stop(phy, + pl->config->eee_rx_clk_stop_enable); + if (ret == -EOPNOTSUPP) + ret = 0; + } return ret; } diff --git a/drivers/net/phy/qcom/qca807x.c b/drivers/net/phy/qcom/qca807x.c index 3279de857b47..2ad8c2586d64 100644 --- a/drivers/net/phy/qcom/qca807x.c +++ b/drivers/net/phy/qcom/qca807x.c @@ -774,7 +774,7 @@ static int qca807x_config_init(struct phy_device *phydev) control_dac &= ~QCA807X_CONTROL_DAC_MASK; if (!priv->dac_full_amplitude) control_dac |= QCA807X_CONTROL_DAC_DSP_AMPLITUDE; - if (!priv->dac_full_amplitude) + if (!priv->dac_full_bias_current) control_dac |= QCA807X_CONTROL_DAC_DSP_BIAS_CURRENT; if (!priv->dac_disable_bias_current_tweak) control_dac |= QCA807X_CONTROL_DAC_BIAS_CURRENT_TWEAK; diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c index fc9e23927b3b..7d60a714ca53 100644 --- a/drivers/net/pse-pd/pd692x0.c +++ b/drivers/net/pse-pd/pd692x0.c @@ -1047,7 +1047,7 @@ static int pd692x0_pi_get_pw_limit(struct pse_controller_dev *pcdev, if (ret < 0) return ret; - return pd692x0_pi_get_pw_from_table(buf.data[2], buf.data[3]); + return pd692x0_pi_get_pw_from_table(buf.data[0], buf.data[1]); } static int pd692x0_pi_set_pw_limit(struct pse_controller_dev *pcdev, diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index 4f2a54afc4d0..4602e26eb8c8 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -319,7 +319,7 @@ static int pse_pi_get_current_limit(struct regulator_dev *rdev) goto out; mW = ret; - ret = pse_pi_get_voltage(rdev); + ret = _pse_pi_get_voltage(rdev); if (!ret) { dev_err(pcdev->dev, "Voltage null\n"); ret = -ERANGE; @@ -356,7 +356,7 @@ static int pse_pi_set_current_limit(struct regulator_dev *rdev, int min_uA, id = rdev_get_id(rdev); mutex_lock(&pcdev->lock); - ret = pse_pi_get_voltage(rdev); + ret = _pse_pi_get_voltage(rdev); if (!ret) { dev_err(pcdev->dev, "Voltage null\n"); ret = -ERANGE; diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index dc7cbd6a9798..f4019815f473 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -2639,7 +2639,9 @@ int team_nl_options_set_doit(struct sk_buff *skb, struct genl_info *info) ctx.data.u32_val = nla_get_u32(attr_data); break; case TEAM_OPTION_TYPE_STRING: - if (nla_len(attr_data) > TEAM_STRING_MAX_LEN) { + if (nla_len(attr_data) > TEAM_STRING_MAX_LEN || + !memchr(nla_data(attr_data), '\0', + nla_len(attr_data))) { err = -EINVAL; goto team_put; } diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 28624cca91f8..acf96f262488 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -574,18 +574,14 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, return ret; } -static inline bool tun_capable(struct tun_struct *tun) +static inline bool tun_not_capable(struct tun_struct *tun) { const struct cred *cred = current_cred(); struct net *net = dev_net(tun->dev); - if (ns_capable(net->user_ns, CAP_NET_ADMIN)) - return 1; - if (uid_valid(tun->owner) && uid_eq(cred->euid, tun->owner)) - return 1; - if (gid_valid(tun->group) && in_egroup_p(tun->group)) - return 1; - return 0; + return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || + (gid_valid(tun->group) && !in_egroup_p(tun->group))) && + !ns_capable(net->user_ns, CAP_NET_ADMIN); } static void tun_set_real_num_queues(struct tun_struct *tun) @@ -2782,7 +2778,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) !!(tun->flags & IFF_MULTI_QUEUE)) return -EINVAL; - if (!tun_capable(tun)) + if (tun_not_capable(tun)) return -EPERM; err = security_tun_dev_open(tun->security); if (err < 0) diff --git a/drivers/net/usb/gl620a.c b/drivers/net/usb/gl620a.c index 46af78caf457..0bfa37c14059 100644 --- a/drivers/net/usb/gl620a.c +++ b/drivers/net/usb/gl620a.c @@ -179,9 +179,7 @@ static int genelink_bind(struct usbnet *dev, struct usb_interface *intf) { dev->hard_mtu = GL_RCV_BUF_SIZE; dev->net->hard_header_len += 4; - dev->in = usb_rcvbulkpipe(dev->udev, dev->driver_info->in); - dev->out = usb_sndbulkpipe(dev->udev, dev->driver_info->out); - return 0; + return usbnet_get_endpoints(dev, intf); } static const struct driver_info genelink_info = { diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c index 1341374a4588..616ecc38d172 100644 --- a/drivers/net/vmxnet3/vmxnet3_xdp.c +++ b/drivers/net/vmxnet3/vmxnet3_xdp.c @@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter) if (likely(cpu < tq_number)) tq = &adapter->tx_queue[cpu]; else - tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)]; + tq = &adapter->tx_queue[cpu % tq_number]; return tq; } @@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, u32 buf_size; u32 dw2; + spin_lock_irq(&tq->tx_lock); dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; dw2 |= xdpf->len; ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; @@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) { tq->stats.tx_ring_full++; + spin_unlock_irq(&tq->tx_lock); return -ENOSPC; } @@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, tbi->dma_addr = dma_map_single(&adapter->pdev->dev, xdpf->data, buf_size, DMA_TO_DEVICE); - if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) { + spin_unlock_irq(&tq->tx_lock); return -EFAULT; + } tbi->map_type |= VMXNET3_MAP_SINGLE; } else { /* XDP buffer from page pool */ page = virt_to_page(xdpf->data); @@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, dma_wmb(); gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ VMXNET3_TXD_GEN); + spin_unlock_irq(&tq->tx_lock); /* No need to handle the case when tx_num_deferred doesn't reach * threshold. Backend driver at hypervisor side will poll and reset @@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, { struct vmxnet3_adapter *adapter = netdev_priv(dev); struct vmxnet3_tx_queue *tq; + struct netdev_queue *nq; int i; if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))) @@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev, if (tq->stopped) return -ENETDOWN; + nq = netdev_get_tx_queue(adapter->netdev, tq->qid); + + __netif_tx_lock(nq, smp_processor_id()); for (i = 0; i < n; i++) { if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) { tq->stats.xdp_xmit_err++; @@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, } } tq->stats.xdp_xmit += i; + __netif_tx_unlock(nq); return i; } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 05c10acb2a57..92516189e792 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2898,8 +2898,11 @@ static int vxlan_init(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); int err; - if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) - vxlan_vnigroup_init(vxlan); + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) { + err = vxlan_vnigroup_init(vxlan); + if (err) + return err; + } err = gro_cells_init(&vxlan->gro_cells, dev); if (err) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 4dd6cdf84571..abb510d235a5 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -4851,6 +4851,22 @@ static struct ath12k_reg_rule return reg_rule_ptr; } +static u8 ath12k_wmi_ignore_num_extra_rules(struct ath12k_wmi_reg_rule_ext_params *rule, + u32 num_reg_rules) +{ + u8 num_invalid_5ghz_rules = 0; + u32 count, start_freq; + + for (count = 0; count < num_reg_rules; count++) { + start_freq = le32_get_bits(rule[count].freq_info, REG_RULE_START_FREQ); + + if (start_freq >= ATH12K_MIN_6G_FREQ) + num_invalid_5ghz_rules++; + } + + return num_invalid_5ghz_rules; +} + static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, struct sk_buff *skb, struct ath12k_reg_info *reg_info) @@ -4861,6 +4877,7 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, u32 num_2g_reg_rules, num_5g_reg_rules; u32 num_6g_reg_rules_ap[WMI_REG_CURRENT_MAX_AP_TYPE]; u32 num_6g_reg_rules_cl[WMI_REG_CURRENT_MAX_AP_TYPE][WMI_REG_MAX_CLIENT_TYPE]; + u8 num_invalid_5ghz_ext_rules; u32 total_reg_rules = 0; int ret, i, j; @@ -4954,20 +4971,6 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, memcpy(reg_info->alpha2, &ev->alpha2, REG_ALPHA2_LEN); - /* FIXME: Currently FW includes 6G reg rule also in 5G rule - * list for country US. - * Having same 6G reg rule in 5G and 6G rules list causes - * intersect check to be true, and same rules will be shown - * multiple times in iw cmd. So added hack below to avoid - * parsing 6G rule from 5G reg rule list, and this can be - * removed later, after FW updates to remove 6G reg rule - * from 5G rules list. - */ - if (memcmp(reg_info->alpha2, "US", 2) == 0) { - reg_info->num_5g_reg_rules = REG_US_5G_NUM_REG_RULES; - num_5g_reg_rules = reg_info->num_5g_reg_rules; - } - reg_info->dfs_region = le32_to_cpu(ev->dfs_region); reg_info->phybitmap = le32_to_cpu(ev->phybitmap); reg_info->num_phy = le32_to_cpu(ev->num_phy); @@ -5070,8 +5073,29 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, } } + ext_wmi_reg_rule += num_2g_reg_rules; + + /* Firmware might include 6 GHz reg rule in 5 GHz rule list + * for few countries along with separate 6 GHz rule. + * Having same 6 GHz reg rule in 5 GHz and 6 GHz rules list + * causes intersect check to be true, and same rules will be + * shown multiple times in iw cmd. + * Hence, avoid parsing 6 GHz rule from 5 GHz reg rule list + */ + num_invalid_5ghz_ext_rules = ath12k_wmi_ignore_num_extra_rules(ext_wmi_reg_rule, + num_5g_reg_rules); + + if (num_invalid_5ghz_ext_rules) { + ath12k_dbg(ab, ATH12K_DBG_WMI, + "CC: %s 5 GHz reg rules number %d from fw, %d number of invalid 5 GHz rules", + reg_info->alpha2, reg_info->num_5g_reg_rules, + num_invalid_5ghz_ext_rules); + + num_5g_reg_rules = num_5g_reg_rules - num_invalid_5ghz_ext_rules; + reg_info->num_5g_reg_rules = num_5g_reg_rules; + } + if (num_5g_reg_rules) { - ext_wmi_reg_rule += num_2g_reg_rules; reg_info->reg_rules_5g_ptr = create_ext_reg_rules_from_wmi(num_5g_reg_rules, ext_wmi_reg_rule); @@ -5083,7 +5107,12 @@ static int ath12k_pull_reg_chan_list_ext_update_ev(struct ath12k_base *ab, } } - ext_wmi_reg_rule += num_5g_reg_rules; + /* We have adjusted the number of 5 GHz reg rules above. But still those + * many rules needs to be adjusted in ext_wmi_reg_rule. + * + * NOTE: num_invalid_5ghz_ext_rules will be 0 for rest other cases. + */ + ext_wmi_reg_rule += (num_5g_reg_rules + num_invalid_5ghz_ext_rules); for (i = 0; i < WMI_REG_CURRENT_MAX_AP_TYPE; i++) { reg_info->reg_rules_6g_ap_ptr[i] = diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index b6a197389277..45fe699ce8a5 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -4073,7 +4073,6 @@ struct ath12k_wmi_eht_rate_set_params { #define MAX_REG_RULES 10 #define REG_ALPHA2_LEN 2 #define MAX_6G_REG_RULES 5 -#define REG_US_5G_NUM_REG_RULES 4 enum wmi_start_event_param { WMI_VDEV_START_RESP_EVENT = 0, diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index e4395b1f8c11..d2caa80e9412 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -2712,7 +2712,7 @@ static const struct pci_device_id brcmf_pcie_devid_table[] = { BRCMF_PCIE_DEVICE(BRCM_PCIE_4350_DEVICE_ID, WCC), BRCMF_PCIE_DEVICE_SUB(0x4355, BRCM_PCIE_VENDOR_ID_BROADCOM, 0x4355, WCC), BRCMF_PCIE_DEVICE(BRCM_PCIE_4354_RAW_DEVICE_ID, WCC), - BRCMF_PCIE_DEVICE(BRCM_PCIE_4355_DEVICE_ID, WCC), + BRCMF_PCIE_DEVICE(BRCM_PCIE_4355_DEVICE_ID, WCC_SEED), BRCMF_PCIE_DEVICE(BRCM_PCIE_4356_DEVICE_ID, WCC), BRCMF_PCIE_DEVICE(BRCM_PCIE_43567_DEVICE_ID, WCC), BRCMF_PCIE_DEVICE(BRCM_PCIE_43570_DEVICE_ID, WCC), @@ -2723,7 +2723,7 @@ static const struct pci_device_id brcmf_pcie_devid_table[] = { BRCMF_PCIE_DEVICE(BRCM_PCIE_43602_2G_DEVICE_ID, WCC), BRCMF_PCIE_DEVICE(BRCM_PCIE_43602_5G_DEVICE_ID, WCC), BRCMF_PCIE_DEVICE(BRCM_PCIE_43602_RAW_DEVICE_ID, WCC), - BRCMF_PCIE_DEVICE(BRCM_PCIE_4364_DEVICE_ID, WCC), + BRCMF_PCIE_DEVICE(BRCM_PCIE_4364_DEVICE_ID, WCC_SEED), BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_DEVICE_ID, BCA), BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_2G_DEVICE_ID, BCA), BRCMF_PCIE_DEVICE(BRCM_PCIE_4365_5G_DEVICE_ID, BCA), diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c index d5a9360323d2..8755c5e6a65b 100644 --- a/drivers/net/wwan/mhi_wwan_mbim.c +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -220,7 +220,7 @@ static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *s if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) && (mbim->rx_seq || le16_to_cpu(nth16->wSequence)) && !(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) { - net_err_ratelimited("sequence number glitch prev=%d curr=%d\n", + net_dbg_ratelimited("sequence number glitch prev=%d curr=%d\n", mbim->rx_seq, le16_to_cpu(nth16->wSequence)); } mbim->rx_seq = le16_to_cpu(nth16->wSequence); diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 1de11b722f04..a060f69558e7 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1011,25 +1011,37 @@ static void apple_nvme_reset_work(struct work_struct *work) ret = apple_rtkit_shutdown(anv->rtk); if (ret) goto out; + + writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); } - writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + /* + * Only do the soft-reset if the CPU is not running, which means either we + * or the previous stage shut it down cleanly. + */ + if (!(readl(anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL) & + APPLE_ANS_COPROC_CPU_CONTROL_RUN)) { - ret = reset_control_assert(anv->reset); - if (ret) - goto out; + ret = reset_control_assert(anv->reset); + if (ret) + goto out; - ret = apple_rtkit_reinit(anv->rtk); - if (ret) - goto out; + ret = apple_rtkit_reinit(anv->rtk); + if (ret) + goto out; - ret = reset_control_deassert(anv->reset); - if (ret) - goto out; + ret = reset_control_deassert(anv->reset); + if (ret) + goto out; + + writel(APPLE_ANS_COPROC_CPU_CONTROL_RUN, + anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + + ret = apple_rtkit_boot(anv->rtk); + } else { + ret = apple_rtkit_wake(anv->rtk); + } - writel(APPLE_ANS_COPROC_CPU_CONTROL_RUN, - anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); - ret = apple_rtkit_boot(anv->rtk); if (ret) { dev_err(anv->dev, "ANS did not boot"); goto out; @@ -1516,6 +1528,7 @@ static struct apple_nvme *apple_nvme_alloc(struct platform_device *pdev) return anv; put_dev: + apple_nvme_detach_genpd(anv); put_device(anv->dev); return ERR_PTR(ret); } @@ -1549,6 +1562,7 @@ out_uninit_ctrl: nvme_uninit_ctrl(&anv->ctrl); out_put_ctrl: nvme_put_ctrl(&anv->ctrl); + apple_nvme_detach_genpd(anv); return ret; } @@ -1563,9 +1577,12 @@ static void apple_nvme_remove(struct platform_device *pdev) apple_nvme_disable(anv, true); nvme_uninit_ctrl(&anv->ctrl); - if (apple_rtkit_is_running(anv->rtk)) + if (apple_rtkit_is_running(anv->rtk)) { apple_rtkit_shutdown(anv->rtk); + writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + } + apple_nvme_detach_genpd(anv); } @@ -1574,8 +1591,11 @@ static void apple_nvme_shutdown(struct platform_device *pdev) struct apple_nvme *anv = platform_get_drvdata(pdev); apple_nvme_disable(anv, true); - if (apple_rtkit_is_running(anv->rtk)) + if (apple_rtkit_is_running(anv->rtk)) { apple_rtkit_shutdown(anv->rtk); + + writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + } } static int apple_nvme_resume(struct device *dev) @@ -1592,10 +1612,11 @@ static int apple_nvme_suspend(struct device *dev) apple_nvme_disable(anv, true); - if (apple_rtkit_is_running(anv->rtk)) + if (apple_rtkit_is_running(anv->rtk)) { ret = apple_rtkit_shutdown(anv->rtk); - writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + writel(0, anv->mmio_coproc + APPLE_ANS_COPROC_CPU_CONTROL); + } return ret; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 40046770f1bf..f028913e2e62 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -564,8 +564,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, switch (new_state) { case NVME_CTRL_LIVE: switch (old_state) { - case NVME_CTRL_NEW: - case NVME_CTRL_RESETTING: case NVME_CTRL_CONNECTING: changed = true; fallthrough; @@ -1700,7 +1698,13 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0, &result); - if (status < 0) + + /* + * It's either a kernel error or the host observed a connection + * lost. In either case it's not possible communicate with the + * controller and thus enter the error code path. + */ + if (status < 0 || status == NVME_SC_HOST_PATH_ERROR) return status; /* diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 094be164ffdc..b9929a5a7f4e 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -785,49 +785,8 @@ nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) "NVME-FC{%d}: controller connectivity lost. Awaiting " "Reconnect", ctrl->cnum); - switch (nvme_ctrl_state(&ctrl->ctrl)) { - case NVME_CTRL_NEW: - case NVME_CTRL_LIVE: - /* - * Schedule a controller reset. The reset will terminate the - * association and schedule the reconnect timer. Reconnects - * will be attempted until either the ctlr_loss_tmo - * (max_retries * connect_delay) expires or the remoteport's - * dev_loss_tmo expires. - */ - if (nvme_reset_ctrl(&ctrl->ctrl)) { - dev_warn(ctrl->ctrl.device, - "NVME-FC{%d}: Couldn't schedule reset.\n", - ctrl->cnum); - nvme_delete_ctrl(&ctrl->ctrl); - } - break; - - case NVME_CTRL_CONNECTING: - /* - * The association has already been terminated and the - * controller is attempting reconnects. No need to do anything - * futher. Reconnects will be attempted until either the - * ctlr_loss_tmo (max_retries * connect_delay) expires or the - * remoteport's dev_loss_tmo expires. - */ - break; - - case NVME_CTRL_RESETTING: - /* - * Controller is already in the process of terminating the - * association. No need to do anything further. The reconnect - * step will kick in naturally after the association is - * terminated. - */ - break; - - case NVME_CTRL_DELETING: - case NVME_CTRL_DELETING_NOIO: - default: - /* no action to take - let it delete */ - break; - } + set_bit(ASSOC_FAILED, &ctrl->flags); + nvme_reset_ctrl(&ctrl->ctrl); } /** @@ -2079,7 +2038,8 @@ done: nvme_fc_complete_rq(rq); check_error: - if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING) + if (terminate_assoc && + nvme_ctrl_state(&ctrl->ctrl) != NVME_CTRL_RESETTING) queue_work(nvme_reset_wq, &ctrl->ioerr_work); } @@ -2533,6 +2493,8 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) { + enum nvme_ctrl_state state = nvme_ctrl_state(&ctrl->ctrl); + /* * if an error (io timeout, etc) while (re)connecting, the remote * port requested terminating of the association (disconnect_ls) @@ -2540,9 +2502,8 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) * the controller. Abort any ios on the association and let the * create_association error path resolve things. */ - if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { + if (state == NVME_CTRL_CONNECTING) { __nvme_fc_abort_outstanding_ios(ctrl, true); - set_bit(ASSOC_FAILED, &ctrl->flags); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport error during (re)connect\n", ctrl->cnum); @@ -2550,7 +2511,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) } /* Otherwise, only proceed if in LIVE state - e.g. on first error */ - if (ctrl->ctrl.state != NVME_CTRL_LIVE) + if (state != NVME_CTRL_LIVE) return; dev_warn(ctrl->ctrl.device, @@ -3061,7 +3022,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) struct nvmefc_ls_rcv_op *disls = NULL; unsigned long flags; int ret; - bool changed; ++ctrl->ctrl.nr_reconnects; @@ -3172,12 +3132,13 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) if (ret) goto out_term_aen_ops; - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE)) { + ret = -EIO; + goto out_term_aen_ops; + } ctrl->ctrl.nr_reconnects = 0; - - if (changed) - nvme_start_ctrl(&ctrl->ctrl); + nvme_start_ctrl(&ctrl->ctrl); return 0; /* Success */ @@ -3578,8 +3539,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) || - !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { dev_err(ctrl->ctrl.device, "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum); goto fail_ctrl; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index e8930146847a..b1b46c2713e1 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -283,8 +283,7 @@ static bool nvme_validate_passthru_nsid(struct nvme_ctrl *ctrl, { if (ns && nsid != ns->head->ns_id) { dev_err(ctrl->device, - "%s: nsid (%u) in cmd does not match nsid (%u)" - "of namespace\n", + "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", current->comm, nsid, ns->head->ns_id); return false; } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 278bed4e35bb..950289405ef2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2153,14 +2153,6 @@ static int nvme_alloc_host_mem_multi(struct nvme_dev *dev, u64 preferred, return 0; out_free_bufs: - while (--i >= 0) { - size_t size = le32_to_cpu(descs[i].size) * NVME_CTRL_PAGE_SIZE; - - dma_free_attrs(dev->dev, size, bufs[i], - le64_to_cpu(descs[i].addr), - DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); - } - kfree(bufs); out_free_descs: dma_free_coherent(dev->dev, descs_size, descs, descs_dma); @@ -3147,7 +3139,9 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) * because of high power consumption (> 2 Watt) in s2idle * sleep. Only some boards with Intel CPU are affected. */ - if (dmi_match(DMI_BOARD_NAME, "GMxPXxx") || + if (dmi_match(DMI_BOARD_NAME, "DN50Z-140HC-YD") || + dmi_match(DMI_BOARD_NAME, "GMxPXxx") || + dmi_match(DMI_BOARD_NAME, "GXxMRXx") || dmi_match(DMI_BOARD_NAME, "PH4PG31") || dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1") || dmi_match(DMI_BOARD_NAME, "PH6PG01_PH6PG71")) @@ -3712,6 +3706,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1dbe, 0x5216), /* Acer/INNOGRIT FA100/5216 NVMe SSD */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1dbe, 0x5236), /* ADATA XPG GAMMIX S70 */ .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1e49, 0x0021), /* ZHITAI TiPro5000 NVMe SSD */ diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index b68a9e5f1ea3..3a41b9ab0f13 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -792,7 +792,7 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, return a->mode; } -const struct attribute_group nvme_tls_attrs_group = { +static const struct attribute_group nvme_tls_attrs_group = { .attrs = nvme_tls_attrs, .is_visible = nvme_tls_attrs_are_visible, }; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 841238f38fdd..8a9131c95a3d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -763,6 +763,40 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, return 0; } +static void nvme_tcp_handle_c2h_term(struct nvme_tcp_queue *queue, + struct nvme_tcp_term_pdu *pdu) +{ + u16 fes; + const char *msg; + u32 plen = le32_to_cpu(pdu->hdr.plen); + + static const char * const msg_table[] = { + [NVME_TCP_FES_INVALID_PDU_HDR] = "Invalid PDU Header Field", + [NVME_TCP_FES_PDU_SEQ_ERR] = "PDU Sequence Error", + [NVME_TCP_FES_HDR_DIGEST_ERR] = "Header Digest Error", + [NVME_TCP_FES_DATA_OUT_OF_RANGE] = "Data Transfer Out Of Range", + [NVME_TCP_FES_R2T_LIMIT_EXCEEDED] = "R2T Limit Exceeded", + [NVME_TCP_FES_UNSUPPORTED_PARAM] = "Unsupported Parameter", + }; + + if (plen < NVME_TCP_MIN_C2HTERM_PLEN || + plen > NVME_TCP_MAX_C2HTERM_PLEN) { + dev_err(queue->ctrl->ctrl.device, + "Received a malformed C2HTermReq PDU (plen = %u)\n", + plen); + return; + } + + fes = le16_to_cpu(pdu->fes); + if (fes && fes < ARRAY_SIZE(msg_table)) + msg = msg_table[fes]; + else + msg = "Unknown"; + + dev_err(queue->ctrl->ctrl.device, + "Received C2HTermReq (FES = %s)\n", msg); +} + static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int *offset, size_t *len) { @@ -784,6 +818,15 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, return 0; hdr = queue->pdu; + if (unlikely(hdr->type == nvme_tcp_c2h_term)) { + /* + * C2HTermReq never includes Header or Data digests. + * Skip the checks. + */ + nvme_tcp_handle_c2h_term(queue, (void *)queue->pdu); + return -EINVAL; + } + if (queue->hdr_digest) { ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen); if (unlikely(ret)) @@ -1449,11 +1492,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) msg.msg_control = cbuf; msg.msg_controllen = sizeof(cbuf); } + msg.msg_flags = MSG_WAITALL; ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); - if (ret < 0) { + if (ret < sizeof(*icresp)) { pr_warn("queue %d: failed to receive icresp, error %d\n", nvme_tcp_queue_id(queue), ret); + if (ret >= 0) + ret = -ECONNRESET; goto free_icresp; } ret = -ENOTCONN; @@ -1565,7 +1611,7 @@ static bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue) ctrl->io_queues[HCTX_TYPE_POLL]; } -/** +/* * Track the number of queues assigned to each cpu using a global per-cpu * counter and select the least used cpu from the mq_map. Our goal is to spread * different controllers I/O threads across different cpu cores. diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index e670dc185a96..acc138bbf8f2 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -1068,6 +1068,7 @@ static void nvme_execute_identify_ns_nvm(struct nvmet_req *req) goto out; } status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); + kfree(id); out: nvmet_req_complete(req, status); } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index cdc4a09a6e8a..2e741696f371 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -606,6 +606,9 @@ int nvmet_ns_enable(struct nvmet_ns *ns) goto out_dev_put; } + if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL)) + goto out_pr_exit; + nvmet_ns_changed(subsys, ns->nsid); ns->enabled = true; xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED); @@ -613,6 +616,9 @@ int nvmet_ns_enable(struct nvmet_ns *ns) out_unlock: mutex_unlock(&subsys->lock); return ret; +out_pr_exit: + if (ns->pr.enable) + nvmet_pr_exit_ns(ns); out_dev_put: list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); @@ -638,6 +644,19 @@ void nvmet_ns_disable(struct nvmet_ns *ns) mutex_unlock(&subsys->lock); + /* + * Now that we removed the namespaces from the lookup list, we + * can kill the per_cpu ref and wait for any remaining references + * to be dropped, as well as a RCU grace period for anyone only + * using the namepace under rcu_read_lock(). Note that we can't + * use call_rcu here as we need to ensure the namespaces have + * been fully destroyed before unloading the module. + */ + percpu_ref_kill(&ns->ref); + synchronize_rcu(); + wait_for_completion(&ns->disable_done); + percpu_ref_exit(&ns->ref); + if (ns->pr.enable) nvmet_pr_exit_ns(ns); @@ -660,22 +679,6 @@ void nvmet_ns_free(struct nvmet_ns *ns) if (ns->nsid == subsys->max_nsid) subsys->max_nsid = nvmet_max_nsid(subsys); - mutex_unlock(&subsys->lock); - - /* - * Now that we removed the namespaces from the lookup list, we - * can kill the per_cpu ref and wait for any remaining references - * to be dropped, as well as a RCU grace period for anyone only - * using the namepace under rcu_read_lock(). Note that we can't - * use call_rcu here as we need to ensure the namespaces have - * been fully destroyed before unloading the module. - */ - percpu_ref_kill(&ns->ref); - synchronize_rcu(); - wait_for_completion(&ns->disable_done); - percpu_ref_exit(&ns->ref); - - mutex_lock(&subsys->lock); subsys->nr_namespaces--; mutex_unlock(&subsys->lock); @@ -705,9 +708,6 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) ns->nsid = nsid; ns->subsys = subsys; - if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL)) - goto out_free; - if (ns->nsid > subsys->max_nsid) subsys->max_nsid = nsid; @@ -730,8 +730,6 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) return ns; out_exit: subsys->max_nsid = nvmet_max_nsid(subsys); - percpu_ref_exit(&ns->ref); -out_free: kfree(ns); out_unlock: mutex_unlock(&subsys->lock); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index a7ff05b3be29..eb406c90c167 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -287,7 +287,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) args.subsysnqn = d->subsysnqn; args.hostnqn = d->hostnqn; args.hostid = &d->hostid; - args.kato = c->kato; + args.kato = le32_to_cpu(c->kato); ctrl = nvmet_alloc_ctrl(&args); if (!ctrl) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index c1f574fe3280..83be0657e6df 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -272,7 +272,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) iter_flags = SG_MITER_FROM_SG; } - if (req->cmd->rw.control & NVME_RW_LR) + if (req->cmd->rw.control & cpu_to_le16(NVME_RW_LR)) opf |= REQ_FAILFAST_DEV; if (is_pci_p2pdma_page(sg_page(req->sg))) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index b540216c0c9a..d2c1233981e1 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -589,7 +589,7 @@ struct nvmet_alloc_ctrl_args { const struct nvmet_fabrics_ops *ops; struct device *p2p_client; u32 kato; - u32 result; + __le32 result; u16 error_loc; u16 status; }; @@ -784,37 +784,37 @@ u16 nvmet_report_invalid_opcode(struct nvmet_req *req); static inline bool nvmet_cc_en(u32 cc) { - return (cc >> NVME_CC_EN_SHIFT) & 0x1; + return (cc & NVME_CC_ENABLE) >> NVME_CC_EN_SHIFT; } static inline u8 nvmet_cc_css(u32 cc) { - return (cc >> NVME_CC_CSS_SHIFT) & 0x7; + return (cc & NVME_CC_CSS_MASK) >> NVME_CC_CSS_SHIFT; } static inline u8 nvmet_cc_mps(u32 cc) { - return (cc >> NVME_CC_MPS_SHIFT) & 0xf; + return (cc & NVME_CC_MPS_MASK) >> NVME_CC_MPS_SHIFT; } static inline u8 nvmet_cc_ams(u32 cc) { - return (cc >> NVME_CC_AMS_SHIFT) & 0x7; + return (cc & NVME_CC_AMS_MASK) >> NVME_CC_AMS_SHIFT; } static inline u8 nvmet_cc_shn(u32 cc) { - return (cc >> NVME_CC_SHN_SHIFT) & 0x3; + return (cc & NVME_CC_SHN_MASK) >> NVME_CC_SHN_SHIFT; } static inline u8 nvmet_cc_iosqes(u32 cc) { - return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; + return (cc & NVME_CC_IOSQES_MASK) >> NVME_CC_IOSQES_SHIFT; } static inline u8 nvmet_cc_iocqes(u32 cc) { - return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; + return (cc & NVME_CC_IOCQES_MASK) >> NVME_CC_IOCQES_SHIFT; } /* Convert a 32-bit number to a 16-bit 0's based number */ diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index ac30b42cc622..565d2bd36dcd 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -46,7 +46,7 @@ static DEFINE_MUTEX(nvmet_pci_epf_ports_mutex); /* * BAR CC register and SQ polling intervals. */ -#define NVMET_PCI_EPF_CC_POLL_INTERVAL msecs_to_jiffies(5) +#define NVMET_PCI_EPF_CC_POLL_INTERVAL msecs_to_jiffies(10) #define NVMET_PCI_EPF_SQ_POLL_INTERVAL msecs_to_jiffies(5) #define NVMET_PCI_EPF_SQ_POLL_IDLE msecs_to_jiffies(5000) @@ -1694,6 +1694,7 @@ static void nvmet_pci_epf_poll_sqs_work(struct work_struct *work) struct nvmet_pci_epf_ctrl *ctrl = container_of(work, struct nvmet_pci_epf_ctrl, poll_sqs.work); struct nvmet_pci_epf_queue *sq; + unsigned long limit = jiffies; unsigned long last = 0; int i, nr_sqs; @@ -1708,6 +1709,16 @@ static void nvmet_pci_epf_poll_sqs_work(struct work_struct *work) nr_sqs++; } + /* + * If we have been running for a while, reschedule to let other + * tasks run and to avoid RCU stalls. + */ + if (time_is_before_jiffies(limit + secs_to_jiffies(1))) { + cond_resched(); + limit = jiffies; + continue; + } + if (nr_sqs) { last = jiffies; continue; @@ -1822,14 +1833,14 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) if (ctrl->io_sqes < sizeof(struct nvme_command)) { dev_err(ctrl->dev, "Unsupported I/O SQES %zu (need %zu)\n", ctrl->io_sqes, sizeof(struct nvme_command)); - return -EINVAL; + goto err; } ctrl->io_cqes = 1UL << nvmet_cc_iocqes(ctrl->cc); if (ctrl->io_cqes < sizeof(struct nvme_completion)) { dev_err(ctrl->dev, "Unsupported I/O CQES %zu (need %zu)\n", ctrl->io_sqes, sizeof(struct nvme_completion)); - return -EINVAL; + goto err; } /* Create the admin queue. */ @@ -1844,7 +1855,7 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) qsize, pci_addr, 0); if (status != NVME_SC_SUCCESS) { dev_err(ctrl->dev, "Failed to create admin completion queue\n"); - return -EINVAL; + goto err; } qsize = aqa & 0x00000fff; @@ -1854,17 +1865,22 @@ static int nvmet_pci_epf_enable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) if (status != NVME_SC_SUCCESS) { dev_err(ctrl->dev, "Failed to create admin submission queue\n"); nvmet_pci_epf_delete_cq(ctrl->tctrl, 0); - return -EINVAL; + goto err; } ctrl->sq_ab = NVMET_PCI_EPF_SQ_AB; ctrl->irq_vector_threshold = NVMET_PCI_EPF_IV_THRESHOLD; ctrl->enabled = true; + ctrl->csts = NVME_CSTS_RDY; /* Start polling the controller SQs. */ schedule_delayed_work(&ctrl->poll_sqs, 0); return 0; + +err: + ctrl->csts = 0; + return -EINVAL; } static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) @@ -1889,6 +1905,8 @@ static void nvmet_pci_epf_disable_ctrl(struct nvmet_pci_epf_ctrl *ctrl) /* Delete the admin queue last. */ nvmet_pci_epf_delete_sq(ctrl->tctrl, 0); nvmet_pci_epf_delete_cq(ctrl->tctrl, 0); + + ctrl->csts &= ~NVME_CSTS_RDY; } static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) @@ -1903,19 +1921,19 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) old_cc = ctrl->cc; new_cc = nvmet_pci_epf_bar_read32(ctrl, NVME_REG_CC); + if (new_cc == old_cc) + goto reschedule_work; + ctrl->cc = new_cc; if (nvmet_cc_en(new_cc) && !nvmet_cc_en(old_cc)) { ret = nvmet_pci_epf_enable_ctrl(ctrl); if (ret) - return; - ctrl->csts |= NVME_CSTS_RDY; + goto reschedule_work; } - if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc)) { + if (!nvmet_cc_en(new_cc) && nvmet_cc_en(old_cc)) nvmet_pci_epf_disable_ctrl(ctrl); - ctrl->csts &= ~NVME_CSTS_RDY; - } if (nvmet_cc_shn(new_cc) && !nvmet_cc_shn(old_cc)) { nvmet_pci_epf_disable_ctrl(ctrl); @@ -1928,6 +1946,7 @@ static void nvmet_pci_epf_poll_cc_work(struct work_struct *work) nvmet_update_cc(ctrl->tctrl, ctrl->cc); nvmet_pci_epf_bar_write32(ctrl, NVME_REG_CSTS, ctrl->csts); +reschedule_work: schedule_delayed_work(&ctrl->poll_cc, NVMET_PCI_EPF_CC_POLL_INTERVAL); } diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 1afd93026f9b..2a4536ef6184 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -996,6 +996,27 @@ out_err: nvmet_req_complete(&cmd->req, status); } +static bool nvmet_rdma_recv_not_live(struct nvmet_rdma_queue *queue, + struct nvmet_rdma_rsp *rsp) +{ + unsigned long flags; + bool ret = true; + + spin_lock_irqsave(&queue->state_lock, flags); + /* + * recheck queue state is not live to prevent a race condition + * with RDMA_CM_EVENT_ESTABLISHED handler. + */ + if (queue->state == NVMET_RDMA_Q_LIVE) + ret = false; + else if (queue->state == NVMET_RDMA_Q_CONNECTING) + list_add_tail(&rsp->wait_list, &queue->rsp_wait_list); + else + nvmet_rdma_put_rsp(rsp); + spin_unlock_irqrestore(&queue->state_lock, flags); + return ret; +} + static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_cmd *cmd = @@ -1038,17 +1059,9 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) rsp->n_rdma = 0; rsp->invalidate_rkey = 0; - if (unlikely(queue->state != NVMET_RDMA_Q_LIVE)) { - unsigned long flags; - - spin_lock_irqsave(&queue->state_lock, flags); - if (queue->state == NVMET_RDMA_Q_CONNECTING) - list_add_tail(&rsp->wait_list, &queue->rsp_wait_list); - else - nvmet_rdma_put_rsp(rsp); - spin_unlock_irqrestore(&queue->state_lock, flags); + if (unlikely(queue->state != NVMET_RDMA_Q_LIVE) && + nvmet_rdma_recv_not_live(queue, rsp)) return; - } nvmet_rdma_handle_command(queue, rsp); } diff --git a/drivers/of/address.c b/drivers/of/address.c index 125833e5ce52..d177a2b9edaf 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -16,6 +16,8 @@ #include <linux/string.h> #include <linux/dma-direct.h> /* for bus_dma_region */ +#include <kunit/visibility.h> + /* Uncomment me to enable of_dump_addr() debugging output */ // #define DEBUG @@ -183,7 +185,7 @@ static u64 of_bus_pci_map(__be32 *addr, const __be32 *range, int na, int ns, #endif /* CONFIG_PCI */ -static int __of_address_resource_bounds(struct resource *r, u64 start, u64 size) +VISIBLE_IF_KUNIT int __of_address_resource_bounds(struct resource *r, u64 start, u64 size) { if (overflows_type(start, r->start)) return -EOVERFLOW; @@ -197,6 +199,7 @@ static int __of_address_resource_bounds(struct resource *r, u64 start, u64 size) return 0; } +EXPORT_SYMBOL_IF_KUNIT(__of_address_resource_bounds); /* * of_pci_range_to_resource - Create a resource from an of_pci_range diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index f3e1193c8ded..1bdc7ceef3c5 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -208,4 +208,8 @@ static void __maybe_unused of_dump_addr(const char *s, const __be32 *addr, int n static void __maybe_unused of_dump_addr(const char *s, const __be32 *addr, int na) { } #endif +#if IS_ENABLED(CONFIG_KUNIT) +int __of_address_resource_bounds(struct resource *r, u64 start, u64 size); +#endif + #endif /* _LINUX_OF_PRIVATE_H */ diff --git a/drivers/of/of_test.c b/drivers/of/of_test.c index b0557ded838f..8bba5a72c9c7 100644 --- a/drivers/of/of_test.c +++ b/drivers/of/of_test.c @@ -2,6 +2,7 @@ /* * KUnit tests for OF APIs */ +#include <linux/ioport.h> #include <linux/module.h> #include <linux/of.h> @@ -54,8 +55,124 @@ static struct kunit_suite of_dtb_suite = { .init = of_dtb_test_init, }; +struct of_address_resource_bounds_case { + u64 start; + u64 size; + int ret; + + u64 res_start; + u64 res_end; +}; + +static void of_address_resource_bounds_case_desc(const struct of_address_resource_bounds_case *p, + char *name) +{ + snprintf(name, KUNIT_PARAM_DESC_SIZE, "start=0x%016llx,size=0x%016llx", p->start, p->size); +} + +static const struct of_address_resource_bounds_case of_address_resource_bounds_cases[] = { + { + .start = 0, + .size = 0, + .ret = 0, + .res_start = 0, + .res_end = -1, + }, + { + .start = 0, + .size = 0x1000, + .ret = 0, + .res_start = 0, + .res_end = 0xfff, + }, + { + .start = 0x1000, + .size = 0, + .ret = 0, + .res_start = 0x1000, + .res_end = 0xfff, + }, + { + .start = 0x1000, + .size = 0x1000, + .ret = 0, + .res_start = 0x1000, + .res_end = 0x1fff, + }, + { + .start = 1, + .size = RESOURCE_SIZE_MAX, + .ret = 0, + .res_start = 1, + .res_end = RESOURCE_SIZE_MAX, + }, + { + .start = RESOURCE_SIZE_MAX, + .size = 1, + .ret = 0, + .res_start = RESOURCE_SIZE_MAX, + .res_end = RESOURCE_SIZE_MAX, + }, + { + .start = 2, + .size = RESOURCE_SIZE_MAX, + .ret = -EOVERFLOW, + }, + { + .start = RESOURCE_SIZE_MAX, + .size = 2, + .ret = -EOVERFLOW, + }, + { + .start = ULL(0x100000000), + .size = 1, + .ret = sizeof(resource_size_t) > sizeof(u32) ? 0 : -EOVERFLOW, + .res_start = ULL(0x100000000), + .res_end = ULL(0x100000000), + }, + { + .start = 0x1000, + .size = 0xffffffff, + .ret = sizeof(resource_size_t) > sizeof(u32) ? 0 : -EOVERFLOW, + .res_start = 0x1000, + .res_end = ULL(0x100000ffe), + }, +}; + +KUNIT_ARRAY_PARAM(of_address_resource_bounds, + of_address_resource_bounds_cases, of_address_resource_bounds_case_desc); + +static void of_address_resource_bounds(struct kunit *test) +{ + const struct of_address_resource_bounds_case *param = test->param_value; + struct resource r; /* Intentionally uninitialized */ + int ret; + + if (!IS_ENABLED(CONFIG_OF_ADDRESS)) + kunit_skip(test, "CONFIG_OF_ADDRESS not enabled\n"); + + ret = __of_address_resource_bounds(&r, param->start, param->size); + KUNIT_EXPECT_EQ(test, param->ret, ret); + if (ret == 0) { + KUNIT_EXPECT_EQ(test, (resource_size_t)param->res_start, r.start); + KUNIT_EXPECT_EQ(test, (resource_size_t)param->res_end, r.end); + KUNIT_EXPECT_EQ(test, param->size, resource_size(&r)); + } +} + +static struct kunit_case of_address_test_cases[] = { + KUNIT_CASE_PARAM(of_address_resource_bounds, of_address_resource_bounds_gen_params), + {} +}; + +static struct kunit_suite of_address_suite = { + .name = "of_address", + .test_cases = of_address_test_cases, +}; + kunit_test_suites( - &of_dtb_suite, + &of_dtb_suite, &of_address_suite, ); MODULE_DESCRIPTION("KUnit tests for OF APIs"); +MODULE_IMPORT_NS("EXPORTED_FOR_KUNIT_TESTING"); MODULE_LICENSE("GPL"); diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index e0bc90597dca..da3e7edcf49d 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -108,9 +108,6 @@ void pci_save_aspm_l1ss_state(struct pci_dev *pdev) pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL2, cap++); pci_read_config_dword(pdev, pdev->l1ss + PCI_L1SS_CTL1, cap++); - if (parent->state_saved) - return; - /* * Save parent's L1 substate configuration so we have it for * pci_restore_aspm_l1ss_state(pdev) to restore. diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b6536ed599c3..246744d8d268 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -339,13 +339,14 @@ out: return (res->flags & IORESOURCE_MEM_64) ? 1 : 0; } -static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) +static __always_inline void pci_read_bases(struct pci_dev *dev, + unsigned int howmany, int rom) { u32 rombar, stdbars[PCI_STD_NUM_BARS]; unsigned int pos, reg; u16 orig_cmd; - BUILD_BUG_ON(howmany > PCI_STD_NUM_BARS); + BUILD_BUG_ON(statically_true(howmany > PCI_STD_NUM_BARS)); if (dev->non_compliant_bars) return; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index b84ff7bade82..82b21e34c545 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5522,7 +5522,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x443, quirk_intel_qat_vf_cap); * AMD Matisse USB 3.0 Host Controller 0x149c * Intel 82579LM Gigabit Ethernet Controller 0x1502 * Intel 82579V Gigabit Ethernet Controller 0x1503 - * + * Mediatek MT7922 802.11ax PCI Express Wireless Network Adapter */ static void quirk_no_flr(struct pci_dev *dev) { @@ -5534,6 +5534,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x149c, quirk_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_AMD, 0x7901, quirk_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1502, quirk_no_flr); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x1503, quirk_no_flr); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_MEDIATEK, 0x0616, quirk_no_flr); /* FLR may cause the SolidRun SNET DPU (rev 0x1) to hang */ static void quirk_no_flr_snet(struct pci_dev *dev) diff --git a/drivers/pci/tph.c b/drivers/pci/tph.c index 1e604fbbda65..07de59ca2ebf 100644 --- a/drivers/pci/tph.c +++ b/drivers/pci/tph.c @@ -360,7 +360,7 @@ int pcie_tph_set_st_entry(struct pci_dev *pdev, unsigned int index, u16 tag) return err; } - set_ctrl_reg_req_en(pdev, pdev->tph_mode); + set_ctrl_reg_req_en(pdev, pdev->tph_req_type); pci_dbg(pdev, "set steering tag: %s table, index=%d, tag=%#04x\n", (loc == PCI_TPH_LOC_MSIX) ? "MSI-X" : "ST", index, tag); diff --git a/drivers/pinctrl/pinconf-generic.c b/drivers/pinctrl/pinconf-generic.c index 0b13d7f17b32..42547f64453e 100644 --- a/drivers/pinctrl/pinconf-generic.c +++ b/drivers/pinctrl/pinconf-generic.c @@ -89,12 +89,12 @@ static void pinconf_generic_dump_one(struct pinctrl_dev *pctldev, seq_puts(s, items[i].display); /* Print unit if available */ if (items[i].has_arg) { - seq_printf(s, " (0x%x", - pinconf_to_config_argument(config)); + u32 val = pinconf_to_config_argument(config); + if (items[i].format) - seq_printf(s, " %s)", items[i].format); + seq_printf(s, " (%u %s)", val, items[i].format); else - seq_puts(s, ")"); + seq_printf(s, " (0x%x)", val); } } } diff --git a/drivers/pinctrl/pinctrl-cy8c95x0.c b/drivers/pinctrl/pinctrl-cy8c95x0.c index 0d6c2027d4c1..d73004b4a45e 100644 --- a/drivers/pinctrl/pinctrl-cy8c95x0.c +++ b/drivers/pinctrl/pinctrl-cy8c95x0.c @@ -42,7 +42,7 @@ #define CY8C95X0_PORTSEL 0x18 /* Port settings, write PORTSEL first */ #define CY8C95X0_INTMASK 0x19 -#define CY8C95X0_PWMSEL 0x1A +#define CY8C95X0_SELPWM 0x1A #define CY8C95X0_INVERT 0x1B #define CY8C95X0_DIRECTION 0x1C /* Drive mode register change state on writing '1' */ @@ -328,14 +328,14 @@ static int cypress_get_pin_mask(struct cy8c95x0_pinctrl *chip, unsigned int pin) static bool cy8c95x0_readable_register(struct device *dev, unsigned int reg) { /* - * Only 12 registers are present per port (see Table 6 in the - * datasheet). + * Only 12 registers are present per port (see Table 6 in the datasheet). */ - if (reg >= CY8C95X0_VIRTUAL && (reg % MUXED_STRIDE) < 12) - return true; + if (reg >= CY8C95X0_VIRTUAL && (reg % MUXED_STRIDE) >= 12) + return false; switch (reg) { case 0x24 ... 0x27: + case 0x31 ... 0x3f: return false; default: return true; @@ -344,8 +344,11 @@ static bool cy8c95x0_readable_register(struct device *dev, unsigned int reg) static bool cy8c95x0_writeable_register(struct device *dev, unsigned int reg) { - if (reg >= CY8C95X0_VIRTUAL) - return true; + /* + * Only 12 registers are present per port (see Table 6 in the datasheet). + */ + if (reg >= CY8C95X0_VIRTUAL && (reg % MUXED_STRIDE) >= 12) + return false; switch (reg) { case CY8C95X0_INPUT_(0) ... CY8C95X0_INPUT_(7): @@ -353,6 +356,7 @@ static bool cy8c95x0_writeable_register(struct device *dev, unsigned int reg) case CY8C95X0_DEVID: return false; case 0x24 ... 0x27: + case 0x31 ... 0x3f: return false; default: return true; @@ -365,8 +369,8 @@ static bool cy8c95x0_volatile_register(struct device *dev, unsigned int reg) case CY8C95X0_INPUT_(0) ... CY8C95X0_INPUT_(7): case CY8C95X0_INTSTATUS_(0) ... CY8C95X0_INTSTATUS_(7): case CY8C95X0_INTMASK: + case CY8C95X0_SELPWM: case CY8C95X0_INVERT: - case CY8C95X0_PWMSEL: case CY8C95X0_DIRECTION: case CY8C95X0_DRV_PU: case CY8C95X0_DRV_PD: @@ -395,7 +399,7 @@ static bool cy8c95x0_muxed_register(unsigned int reg) { switch (reg) { case CY8C95X0_INTMASK: - case CY8C95X0_PWMSEL: + case CY8C95X0_SELPWM: case CY8C95X0_INVERT: case CY8C95X0_DIRECTION: case CY8C95X0_DRV_PU: @@ -466,7 +470,11 @@ static const struct regmap_config cy8c9520_i2c_regmap = { .max_register = 0, /* Updated at runtime */ .num_reg_defaults_raw = 0, /* Updated at runtime */ .use_single_read = true, /* Workaround for regcache bug */ +#if IS_ENABLED(CONFIG_DEBUG_PINCTRL) + .disable_locking = false, +#else .disable_locking = true, +#endif }; static inline int cy8c95x0_regmap_update_bits_base(struct cy8c95x0_pinctrl *chip, @@ -789,7 +797,7 @@ static int cy8c95x0_gpio_get_pincfg(struct cy8c95x0_pinctrl *chip, reg = CY8C95X0_DIRECTION; break; case PIN_CONFIG_MODE_PWM: - reg = CY8C95X0_PWMSEL; + reg = CY8C95X0_SELPWM; break; case PIN_CONFIG_OUTPUT: reg = CY8C95X0_OUTPUT; @@ -868,7 +876,7 @@ static int cy8c95x0_gpio_set_pincfg(struct cy8c95x0_pinctrl *chip, reg = CY8C95X0_DRV_PP_FAST; break; case PIN_CONFIG_MODE_PWM: - reg = CY8C95X0_PWMSEL; + reg = CY8C95X0_SELPWM; break; case PIN_CONFIG_OUTPUT_ENABLE: return cy8c95x0_pinmux_direction(chip, off, !arg); @@ -1153,7 +1161,7 @@ static void cy8c95x0_pin_dbg_show(struct pinctrl_dev *pctldev, struct seq_file * bitmap_zero(mask, MAX_LINE); __set_bit(pin, mask); - if (cy8c95x0_read_regs_mask(chip, CY8C95X0_PWMSEL, pwm, mask)) { + if (cy8c95x0_read_regs_mask(chip, CY8C95X0_SELPWM, pwm, mask)) { seq_puts(s, "not available"); return; } @@ -1198,7 +1206,7 @@ static int cy8c95x0_set_mode(struct cy8c95x0_pinctrl *chip, unsigned int off, bo u8 port = cypress_get_port(chip, off); u8 bit = cypress_get_pin_mask(chip, off); - return cy8c95x0_regmap_write_bits(chip, CY8C95X0_PWMSEL, port, bit, mode ? bit : 0); + return cy8c95x0_regmap_write_bits(chip, CY8C95X0_SELPWM, port, bit, mode ? bit : 0); } static int cy8c95x0_pinmux_mode(struct cy8c95x0_pinctrl *chip, @@ -1347,7 +1355,7 @@ static int cy8c95x0_irq_setup(struct cy8c95x0_pinctrl *chip, int irq) ret = devm_request_threaded_irq(chip->dev, irq, NULL, cy8c95x0_irq_handler, - IRQF_ONESHOT | IRQF_SHARED | IRQF_TRIGGER_HIGH, + IRQF_ONESHOT | IRQF_SHARED, dev_name(chip->dev), chip); if (ret) { dev_err(chip->dev, "failed to request irq %d\n", irq); @@ -1438,15 +1446,15 @@ static int cy8c95x0_probe(struct i2c_client *client) switch (chip->tpin) { case 20: strscpy(chip->name, cy8c95x0_id[0].name); - regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 3 * MUXED_STRIDE; + regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 3 * MUXED_STRIDE - 1; break; case 40: strscpy(chip->name, cy8c95x0_id[1].name); - regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 6 * MUXED_STRIDE; + regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 6 * MUXED_STRIDE - 1; break; case 60: strscpy(chip->name, cy8c95x0_id[2].name); - regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 8 * MUXED_STRIDE; + regmap_range_conf.range_max = CY8C95X0_VIRTUAL + 8 * MUXED_STRIDE - 1; break; default: return -ENODEV; diff --git a/drivers/platform/cznic/Kconfig b/drivers/platform/cznic/Kconfig index 49c383eb6785..13e37b49d9d0 100644 --- a/drivers/platform/cznic/Kconfig +++ b/drivers/platform/cznic/Kconfig @@ -6,6 +6,7 @@ menuconfig CZNIC_PLATFORMS bool "Platform support for CZ.NIC's Turris hardware" + depends on ARCH_MVEBU || COMPILE_TEST help Say Y here to be able to choose driver support for CZ.NIC's Turris devices. This option alone does not add any kernel code. diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index dfb5d4b8c046..30bd366d7b58 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1121,7 +1121,7 @@ static int ideapad_dytc_profile_init(struct ideapad_private *priv) /* Create platform_profile structure and register */ priv->dytc->ppdev = devm_platform_profile_register(&priv->platform_device->dev, - "ideapad-laptop", &priv->dytc, + "ideapad-laptop", priv->dytc, &dytc_profile_ops); if (IS_ERR(priv->dytc->ppdev)) { err = PTR_ERR(priv->dytc->ppdev); diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h index 5c3c0dfa1bf8..f369fb0d3d82 100644 --- a/drivers/platform/x86/intel/ifs/ifs.h +++ b/drivers/platform/x86/intel/ifs/ifs.h @@ -23,12 +23,14 @@ * IFS Image * --------- * - * Intel provides a firmware file containing the scan tests via - * github [#f1]_. Similar to microcode there is a separate file for each + * Intel provides firmware files containing the scan tests via the webpage [#f1]_. + * Look under "In-Field Scan Test Images Download" section towards the + * end of the page. Similar to microcode, there are separate files for each * family-model-stepping. IFS Images are not applicable for some test types. * Wherever applicable the sysfs directory would provide a "current_batch" file * (see below) for loading the image. * + * .. [#f1] https://intel.com/InFieldScan * * IFS Image Loading * ----------------- @@ -125,9 +127,6 @@ * 2) Hardware allows for some number of cores to be tested in parallel. * The driver does not make use of this, it only tests one core at a time. * - * .. [#f1] https://github.com/intel/TBD - * - * * Structural Based Functional Test at Field (SBAF): * ------------------------------------------------- * diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index 31015ebe20d8..092252eb95a8 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c @@ -2,6 +2,7 @@ /* Author: Dan Scally <djrscally@gmail.com> */ #include <linux/acpi.h> +#include <linux/array_size.h> #include <linux/bitfield.h> #include <linux/device.h> #include <linux/gpio/consumer.h> @@ -55,7 +56,7 @@ static void skl_int3472_log_sensor_module_name(struct int3472_discrete_device *i static int skl_int3472_fill_gpiod_lookup(struct gpiod_lookup *table_entry, struct acpi_resource_gpio *agpio, - const char *func, u32 polarity) + const char *func, unsigned long gpio_flags) { char *path = agpio->resource_source.string_ptr; struct acpi_device *adev; @@ -70,14 +71,14 @@ static int skl_int3472_fill_gpiod_lookup(struct gpiod_lookup *table_entry, if (!adev) return -ENODEV; - *table_entry = GPIO_LOOKUP(acpi_dev_name(adev), agpio->pin_table[0], func, polarity); + *table_entry = GPIO_LOOKUP(acpi_dev_name(adev), agpio->pin_table[0], func, gpio_flags); return 0; } static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int3472, struct acpi_resource_gpio *agpio, - const char *func, u32 polarity) + const char *func, unsigned long gpio_flags) { int ret; @@ -87,7 +88,7 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347 } ret = skl_int3472_fill_gpiod_lookup(&int3472->gpios.table[int3472->n_sensor_gpios], - agpio, func, polarity); + agpio, func, gpio_flags); if (ret) return ret; @@ -100,7 +101,7 @@ static int skl_int3472_map_gpio_to_sensor(struct int3472_discrete_device *int347 static struct gpio_desc * skl_int3472_gpiod_get_from_temp_lookup(struct int3472_discrete_device *int3472, struct acpi_resource_gpio *agpio, - const char *func, u32 polarity) + const char *func, unsigned long gpio_flags) { struct gpio_desc *desc; int ret; @@ -111,7 +112,7 @@ skl_int3472_gpiod_get_from_temp_lookup(struct int3472_discrete_device *int3472, return ERR_PTR(-ENOMEM); lookup->dev_id = dev_name(int3472->dev); - ret = skl_int3472_fill_gpiod_lookup(&lookup->table[0], agpio, func, polarity); + ret = skl_int3472_fill_gpiod_lookup(&lookup->table[0], agpio, func, gpio_flags); if (ret) return ERR_PTR(ret); @@ -122,32 +123,76 @@ skl_int3472_gpiod_get_from_temp_lookup(struct int3472_discrete_device *int3472, return desc; } -static void int3472_get_func_and_polarity(u8 type, const char **func, u32 *polarity) +/** + * struct int3472_gpio_map - Map GPIOs to whatever is expected by the + * sensor driver (as in DT bindings) + * @hid: The ACPI HID of the device without the instance number e.g. INT347E + * @type_from: The GPIO type from ACPI ?SDT + * @type_to: The assigned GPIO type, typically same as @type_from + * @func: The function, e.g. "enable" + * @polarity_low: GPIO_ACTIVE_LOW true if the @polarity_low is true, + * GPIO_ACTIVE_HIGH otherwise + */ +struct int3472_gpio_map { + const char *hid; + u8 type_from; + u8 type_to; + bool polarity_low; + const char *func; +}; + +static const struct int3472_gpio_map int3472_gpio_map[] = { + { "INT347E", INT3472_GPIO_TYPE_RESET, INT3472_GPIO_TYPE_RESET, false, "enable" }, +}; + +static void int3472_get_func_and_polarity(struct acpi_device *adev, u8 *type, + const char **func, unsigned long *gpio_flags) { - switch (type) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(int3472_gpio_map); i++) { + /* + * Map the firmware-provided GPIO to whatever a driver expects + * (as in DT bindings). First check if the type matches with the + * GPIO map, then further check that the device _HID matches. + */ + if (*type != int3472_gpio_map[i].type_from) + continue; + + if (!acpi_dev_hid_uid_match(adev, int3472_gpio_map[i].hid, NULL)) + continue; + + *type = int3472_gpio_map[i].type_to; + *gpio_flags = int3472_gpio_map[i].polarity_low ? + GPIO_ACTIVE_LOW : GPIO_ACTIVE_HIGH; + *func = int3472_gpio_map[i].func; + return; + } + + switch (*type) { case INT3472_GPIO_TYPE_RESET: *func = "reset"; - *polarity = GPIO_ACTIVE_LOW; + *gpio_flags = GPIO_ACTIVE_LOW; break; case INT3472_GPIO_TYPE_POWERDOWN: *func = "powerdown"; - *polarity = GPIO_ACTIVE_LOW; + *gpio_flags = GPIO_ACTIVE_LOW; break; case INT3472_GPIO_TYPE_CLK_ENABLE: *func = "clk-enable"; - *polarity = GPIO_ACTIVE_HIGH; + *gpio_flags = GPIO_ACTIVE_HIGH; break; case INT3472_GPIO_TYPE_PRIVACY_LED: *func = "privacy-led"; - *polarity = GPIO_ACTIVE_HIGH; + *gpio_flags = GPIO_ACTIVE_HIGH; break; case INT3472_GPIO_TYPE_POWER_ENABLE: *func = "power-enable"; - *polarity = GPIO_ACTIVE_HIGH; + *gpio_flags = GPIO_ACTIVE_HIGH; break; default: *func = "unknown"; - *polarity = GPIO_ACTIVE_HIGH; + *gpio_flags = GPIO_ACTIVE_HIGH; break; } } @@ -194,7 +239,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, struct gpio_desc *gpio; const char *err_msg; const char *func; - u32 polarity; + unsigned long gpio_flags; int ret; if (!acpi_gpio_get_io_resource(ares, &agpio)) @@ -217,7 +262,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, type = FIELD_GET(INT3472_GPIO_DSM_TYPE, obj->integer.value); - int3472_get_func_and_polarity(type, &func, &polarity); + int3472_get_func_and_polarity(int3472->sensor, &type, &func, &gpio_flags); pin = FIELD_GET(INT3472_GPIO_DSM_PIN, obj->integer.value); /* Pin field is not really used under Windows and wraps around at 8 bits */ @@ -227,16 +272,16 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, active_value = FIELD_GET(INT3472_GPIO_DSM_SENSOR_ON_VAL, obj->integer.value); if (!active_value) - polarity ^= GPIO_ACTIVE_LOW; + gpio_flags ^= GPIO_ACTIVE_LOW; dev_dbg(int3472->dev, "%s %s pin %d active-%s\n", func, agpio->resource_source.string_ptr, agpio->pin_table[0], - str_high_low(polarity == GPIO_ACTIVE_HIGH)); + str_high_low(gpio_flags == GPIO_ACTIVE_HIGH)); switch (type) { case INT3472_GPIO_TYPE_RESET: case INT3472_GPIO_TYPE_POWERDOWN: - ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, func, polarity); + ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, func, gpio_flags); if (ret) err_msg = "Failed to map GPIO pin to sensor\n"; @@ -244,7 +289,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, case INT3472_GPIO_TYPE_CLK_ENABLE: case INT3472_GPIO_TYPE_PRIVACY_LED: case INT3472_GPIO_TYPE_POWER_ENABLE: - gpio = skl_int3472_gpiod_get_from_temp_lookup(int3472, agpio, func, polarity); + gpio = skl_int3472_gpiod_get_from_temp_lookup(int3472, agpio, func, gpio_flags); if (IS_ERR(gpio)) { ret = PTR_ERR(gpio); err_msg = "Failed to get GPIO\n"; diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 10f04b944117..1ee0fb5f8250 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -626,8 +626,8 @@ static u32 convert_ltr_scale(u32 val) static int pmc_core_ltr_show(struct seq_file *s, void *unused) { struct pmc_dev *pmcdev = s->private; - u64 decoded_snoop_ltr, decoded_non_snoop_ltr; - u32 ltr_raw_data, scale, val; + u64 decoded_snoop_ltr, decoded_non_snoop_ltr, val; + u32 ltr_raw_data, scale; u16 snoop_ltr, nonsnoop_ltr; unsigned int i, index, ltr_index = 0; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 1fcb0f99695a..72a10ed2017c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -7885,6 +7885,7 @@ static struct ibm_struct volume_driver_data = { #define FAN_NS_CTRL_STATUS BIT(2) /* Bit which determines control is enabled or not */ #define FAN_NS_CTRL BIT(4) /* Bit which determines control is by host or EC */ +#define FAN_CLOCK_TPM (22500*60) /* Ticks per minute for a 22.5 kHz clock */ enum { /* Fan control constants */ fan_status_offset = 0x2f, /* EC register 0x2f */ @@ -7940,6 +7941,7 @@ static int fan_watchdog_maxinterval; static bool fan_with_ns_addr; static bool ecfw_with_fan_dec_rpm; +static bool fan_speed_in_tpr; static struct mutex fan_mutex; @@ -8142,8 +8144,11 @@ static int fan_get_speed(unsigned int *speed) !acpi_ec_read(fan_rpm_offset + 1, &hi))) return -EIO; - if (likely(speed)) + if (likely(speed)) { *speed = (hi << 8) | lo; + if (fan_speed_in_tpr && *speed != 0) + *speed = FAN_CLOCK_TPM / *speed; + } break; case TPACPI_FAN_RD_TPEC_NS: if (!acpi_ec_read(fan_rpm_status_ns, &lo)) @@ -8176,8 +8181,11 @@ static int fan2_get_speed(unsigned int *speed) if (rc) return -EIO; - if (likely(speed)) + if (likely(speed)) { *speed = (hi << 8) | lo; + if (fan_speed_in_tpr && *speed != 0) + *speed = FAN_CLOCK_TPM / *speed; + } break; case TPACPI_FAN_RD_TPEC_NS: @@ -8788,6 +8796,7 @@ static const struct attribute_group fan_driver_attr_group = { #define TPACPI_FAN_NOFAN 0x0008 /* no fan available */ #define TPACPI_FAN_NS 0x0010 /* For EC with non-Standard register addresses */ #define TPACPI_FAN_DECRPM 0x0020 /* For ECFW's with RPM in register as decimal */ +#define TPACPI_FAN_TPR 0x0040 /* Fan speed is in Ticks Per Revolution */ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1), @@ -8817,6 +8826,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('R', '0', 'V', TPACPI_FAN_NS), /* 11e Gen5 KL-Y */ TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ TPACPI_Q_LNV3('R', '0', 'Q', TPACPI_FAN_DECRPM),/* L480 */ + TPACPI_Q_LNV('8', 'F', TPACPI_FAN_TPR), /* ThinkPad x120e */ }; static int __init fan_init(struct ibm_init_struct *iibm) @@ -8887,6 +8897,8 @@ static int __init fan_init(struct ibm_init_struct *iibm) if (quirks & TPACPI_FAN_Q1) fan_quirk1_setup(); + if (quirks & TPACPI_FAN_TPR) + fan_speed_in_tpr = true; /* Try and probe the 2nd fan */ tp_features.second_fan = 1; /* needed for get_speed to work */ res = fan2_get_speed(&speed); @@ -10319,6 +10331,10 @@ static struct ibm_struct proxsensor_driver_data = { #define DYTC_MODE_PSC_BALANCE 5 /* Default mode aka balanced */ #define DYTC_MODE_PSC_PERFORM 7 /* High power mode aka performance */ +#define DYTC_MODE_PSCV9_LOWPOWER 1 /* Low power mode */ +#define DYTC_MODE_PSCV9_BALANCE 3 /* Default mode aka balanced */ +#define DYTC_MODE_PSCV9_PERFORM 4 /* High power mode aka performance */ + #define DYTC_ERR_MASK 0xF /* Bits 0-3 in cmd result are the error result */ #define DYTC_ERR_SUCCESS 1 /* CMD completed successful */ @@ -10339,6 +10355,10 @@ static int dytc_capabilities; static bool dytc_mmc_get_available; static int profile_force; +static int platform_psc_profile_lowpower = DYTC_MODE_PSC_LOWPOWER; +static int platform_psc_profile_balanced = DYTC_MODE_PSC_BALANCE; +static int platform_psc_profile_performance = DYTC_MODE_PSC_PERFORM; + static int convert_dytc_to_profile(int funcmode, int dytcmode, enum platform_profile_option *profile) { @@ -10360,19 +10380,15 @@ static int convert_dytc_to_profile(int funcmode, int dytcmode, } return 0; case DYTC_FUNCTION_PSC: - switch (dytcmode) { - case DYTC_MODE_PSC_LOWPOWER: + if (dytcmode == platform_psc_profile_lowpower) *profile = PLATFORM_PROFILE_LOW_POWER; - break; - case DYTC_MODE_PSC_BALANCE: + else if (dytcmode == platform_psc_profile_balanced) *profile = PLATFORM_PROFILE_BALANCED; - break; - case DYTC_MODE_PSC_PERFORM: + else if (dytcmode == platform_psc_profile_performance) *profile = PLATFORM_PROFILE_PERFORMANCE; - break; - default: /* Unknown mode */ + else return -EINVAL; - } + return 0; case DYTC_FUNCTION_AMT: /* For now return balanced. It's the closest we have to 'auto' */ @@ -10393,19 +10409,19 @@ static int convert_profile_to_dytc(enum platform_profile_option profile, int *pe if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_LOWPOWER; else if (dytc_capabilities & BIT(DYTC_FC_PSC)) - *perfmode = DYTC_MODE_PSC_LOWPOWER; + *perfmode = platform_psc_profile_lowpower; break; case PLATFORM_PROFILE_BALANCED: if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_BALANCE; else if (dytc_capabilities & BIT(DYTC_FC_PSC)) - *perfmode = DYTC_MODE_PSC_BALANCE; + *perfmode = platform_psc_profile_balanced; break; case PLATFORM_PROFILE_PERFORMANCE: if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_PERFORM; else if (dytc_capabilities & BIT(DYTC_FC_PSC)) - *perfmode = DYTC_MODE_PSC_PERFORM; + *perfmode = platform_psc_profile_performance; break; default: /* Unknown profile */ return -EOPNOTSUPP; @@ -10599,6 +10615,7 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) if (output & BIT(DYTC_QUERY_ENABLE_BIT)) dytc_version = (output >> DYTC_QUERY_REV_BIT) & 0xF; + dbg_printk(TPACPI_DBG_INIT, "DYTC version %d\n", dytc_version); /* Check DYTC is enabled and supports mode setting */ if (dytc_version < 5) return -ENODEV; @@ -10637,6 +10654,11 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) } } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */ pr_debug("PSC is supported\n"); + if (dytc_version >= 9) { /* update profiles for DYTC 9 and up */ + platform_psc_profile_lowpower = DYTC_MODE_PSCV9_LOWPOWER; + platform_psc_profile_balanced = DYTC_MODE_PSCV9_BALANCE; + platform_psc_profile_performance = DYTC_MODE_PSCV9_PERFORM; + } } else { dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n"); return -ENODEV; @@ -10646,8 +10668,8 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) "DYTC version %d: thermal mode available\n", dytc_version); /* Create platform_profile structure and register */ - tpacpi_pprof = devm_platform_profile_register(&tpacpi_pdev->dev, "thinkpad-acpi", - NULL, &dytc_profile_ops); + tpacpi_pprof = platform_profile_register(&tpacpi_pdev->dev, "thinkpad-acpi-profile", + NULL, &dytc_profile_ops); /* * If for some reason platform_profiles aren't enabled * don't quit terminally. @@ -10665,8 +10687,15 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) return 0; } +static void dytc_profile_exit(void) +{ + if (!IS_ERR_OR_NULL(tpacpi_pprof)) + platform_profile_remove(tpacpi_pprof); +} + static struct ibm_struct dytc_profile_driver_data = { .name = "dytc-profile", + .exit = dytc_profile_exit, }; /************************************************************************* diff --git a/drivers/power/supply/axp20x_battery.c b/drivers/power/supply/axp20x_battery.c index fa27195f074e..3c3158f31a48 100644 --- a/drivers/power/supply/axp20x_battery.c +++ b/drivers/power/supply/axp20x_battery.c @@ -466,10 +466,9 @@ static int axp717_battery_get_prop(struct power_supply *psy, /* * If a fault is detected it must also be cleared; if the - * condition persists it should reappear (This is an - * assumption, it's actually not documented). A restart was - * not sufficient to clear the bit in testing despite the - * register listed as POR. + * condition persists it should reappear. A restart was not + * sufficient to clear the bit in testing despite the register + * listed as POR. */ case POWER_SUPPLY_PROP_HEALTH: ret = regmap_read(axp20x_batt->regmap, AXP717_PMU_FAULT, @@ -480,26 +479,26 @@ static int axp717_battery_get_prop(struct power_supply *psy, switch (reg & AXP717_BATT_PMU_FAULT_MASK) { case AXP717_BATT_UVLO_2_5V: val->intval = POWER_SUPPLY_HEALTH_DEAD; - regmap_update_bits(axp20x_batt->regmap, - AXP717_PMU_FAULT, - AXP717_BATT_UVLO_2_5V, - AXP717_BATT_UVLO_2_5V); + regmap_write_bits(axp20x_batt->regmap, + AXP717_PMU_FAULT, + AXP717_BATT_UVLO_2_5V, + AXP717_BATT_UVLO_2_5V); return 0; case AXP717_BATT_OVER_TEMP: val->intval = POWER_SUPPLY_HEALTH_HOT; - regmap_update_bits(axp20x_batt->regmap, - AXP717_PMU_FAULT, - AXP717_BATT_OVER_TEMP, - AXP717_BATT_OVER_TEMP); + regmap_write_bits(axp20x_batt->regmap, + AXP717_PMU_FAULT, + AXP717_BATT_OVER_TEMP, + AXP717_BATT_OVER_TEMP); return 0; case AXP717_BATT_UNDER_TEMP: val->intval = POWER_SUPPLY_HEALTH_COLD; - regmap_update_bits(axp20x_batt->regmap, - AXP717_PMU_FAULT, - AXP717_BATT_UNDER_TEMP, - AXP717_BATT_UNDER_TEMP); + regmap_write_bits(axp20x_batt->regmap, + AXP717_PMU_FAULT, + AXP717_BATT_UNDER_TEMP, + AXP717_BATT_UNDER_TEMP); return 0; default: diff --git a/drivers/power/supply/da9150-fg.c b/drivers/power/supply/da9150-fg.c index 652c1f213af1..4f28ef1bba1a 100644 --- a/drivers/power/supply/da9150-fg.c +++ b/drivers/power/supply/da9150-fg.c @@ -247,9 +247,9 @@ static int da9150_fg_current_avg(struct da9150_fg *fg, DA9150_QIF_SD_GAIN_SIZE); da9150_fg_read_sync_end(fg); - div = (u64) (sd_gain * shunt_val * 65536ULL); + div = 65536ULL * sd_gain * shunt_val; do_div(div, 1000000); - res = (u64) (iavg * 1000000ULL); + res = 1000000ULL * iavg; do_div(res, div); val->intval = (int) res; diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index d0bb52a7a036..76c340b38015 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -1592,11 +1592,11 @@ __power_supply_register(struct device *parent, if (rc) goto register_thermal_failed; - scoped_guard(rwsem_read, &psy->extensions_sem) { - rc = power_supply_create_triggers(psy); - if (rc) - goto create_triggers_failed; + rc = power_supply_create_triggers(psy); + if (rc) + goto create_triggers_failed; + scoped_guard(rwsem_read, &psy->extensions_sem) { rc = power_supply_add_hwmon_sysfs(psy); if (rc) goto add_hwmon_sysfs_failed; diff --git a/drivers/powercap/powercap_sys.c b/drivers/powercap/powercap_sys.c index 52c32dcbf7d8..4112a0097338 100644 --- a/drivers/powercap/powercap_sys.c +++ b/drivers/powercap/powercap_sys.c @@ -627,8 +627,7 @@ struct powercap_control_type *powercap_register_control_type( dev_set_name(&control_type->dev, "%s", name); result = device_register(&control_type->dev); if (result) { - if (control_type->allocated) - kfree(control_type); + put_device(&control_type->dev); return ERR_PTR(result); } idr_init(&control_type->idr); diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 0a2cfc8ad3c5..b3a83b03d9c1 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -414,16 +414,16 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, } static const struct file_operations vmclock_miscdev_fops = { + .owner = THIS_MODULE, .mmap = vmclock_miscdev_mmap, .read = vmclock_miscdev_read, }; /* module operations */ -static void vmclock_remove(struct platform_device *pdev) +static void vmclock_remove(void *data) { - struct device *dev = &pdev->dev; - struct vmclock_state *st = dev_get_drvdata(dev); + struct vmclock_state *st = data; if (st->ptp_clock) ptp_clock_unregister(st->ptp_clock); @@ -506,14 +506,13 @@ static int vmclock_probe(struct platform_device *pdev) if (ret) { dev_info(dev, "Failed to obtain physical address: %d\n", ret); - goto out; + return ret; } if (resource_size(&st->res) < VMCLOCK_MIN_SIZE) { dev_info(dev, "Region too small (0x%llx)\n", resource_size(&st->res)); - ret = -EINVAL; - goto out; + return -EINVAL; } st->clk = devm_memremap(dev, st->res.start, resource_size(&st->res), MEMREMAP_WB | MEMREMAP_DEC); @@ -521,31 +520,34 @@ static int vmclock_probe(struct platform_device *pdev) ret = PTR_ERR(st->clk); dev_info(dev, "failed to map shared memory\n"); st->clk = NULL; - goto out; + return ret; } if (le32_to_cpu(st->clk->magic) != VMCLOCK_MAGIC || le32_to_cpu(st->clk->size) > resource_size(&st->res) || le16_to_cpu(st->clk->version) != 1) { dev_info(dev, "vmclock magic fields invalid\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } ret = ida_alloc(&vmclock_ida, GFP_KERNEL); if (ret < 0) - goto out; + return ret; st->index = ret; ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st); if (ret) - goto out; + return ret; st->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "vmclock%d", st->index); - if (!st->name) { - ret = -ENOMEM; - goto out; - } + if (!st->name) + return -ENOMEM; + + st->miscdev.minor = MISC_DYNAMIC_MINOR; + + ret = devm_add_action_or_reset(&pdev->dev, vmclock_remove, st); + if (ret) + return ret; /* * If the structure is big enough, it can be mapped to userspace. @@ -554,13 +556,12 @@ static int vmclock_probe(struct platform_device *pdev) * cross that bridge if/when we come to it. */ if (le32_to_cpu(st->clk->size) >= PAGE_SIZE) { - st->miscdev.minor = MISC_DYNAMIC_MINOR; st->miscdev.fops = &vmclock_miscdev_fops; st->miscdev.name = st->name; ret = misc_register(&st->miscdev); if (ret) - goto out; + return ret; } /* If there is valid clock information, register a PTP clock */ @@ -570,16 +571,14 @@ static int vmclock_probe(struct platform_device *pdev) if (IS_ERR(st->ptp_clock)) { ret = PTR_ERR(st->ptp_clock); st->ptp_clock = NULL; - vmclock_remove(pdev); - goto out; + return ret; } } if (!st->miscdev.minor && !st->ptp_clock) { /* Neither miscdev nor PTP registered */ dev_info(dev, "vmclock: Neither miscdev nor PTP available; not registering\n"); - ret = -ENODEV; - goto out; + return -ENODEV; } dev_info(dev, "%s: registered %s%s%s\n", st->name, @@ -587,10 +586,7 @@ static int vmclock_probe(struct platform_device *pdev) (st->miscdev.minor && st->ptp_clock) ? ", " : "", st->ptp_clock ? "PTP" : ""); - dev_set_drvdata(dev, st); - - out: - return ret; + return 0; } static const struct acpi_device_id vmclock_acpi_ids[] = { @@ -601,7 +597,6 @@ MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids); static struct platform_driver vmclock_platform_driver = { .probe = vmclock_probe, - .remove = vmclock_remove, .driver = { .name = "vmclock", .acpi_match_table = vmclock_acpi_ids, diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 89578b91c468..4ddf0efead68 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5774,43 +5774,36 @@ regulator_register(struct device *dev, goto clean; } - if (config->init_data) { - /* - * Providing of_match means the framework is expected to parse - * DT to get the init_data. This would conflict with provided - * init_data, if set. Warn if it happens. - */ - if (regulator_desc->of_match) - dev_warn(dev, "Using provided init data - OF match ignored\n"); + /* + * DT may override the config->init_data provided if the platform + * needs to do so. If so, config->init_data is completely ignored. + */ + init_data = regulator_of_get_init_data(dev, regulator_desc, config, + &rdev->dev.of_node); + /* + * Sometimes not all resources are probed already so we need to take + * that into account. This happens most the time if the ena_gpiod comes + * from a gpio extender or something else. + */ + if (PTR_ERR(init_data) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto clean; + } + + /* + * We need to keep track of any GPIO descriptor coming from the + * device tree until we have handled it over to the core. If the + * config that was passed in to this function DOES NOT contain + * a descriptor, and the config after this call DOES contain + * a descriptor, we definitely got one from parsing the device + * tree. + */ + if (!cfg->ena_gpiod && config->ena_gpiod) + dangling_of_gpiod = true; + if (!init_data) { init_data = config->init_data; rdev->dev.of_node = of_node_get(config->of_node); - - } else { - init_data = regulator_of_get_init_data(dev, regulator_desc, - config, - &rdev->dev.of_node); - - /* - * Sometimes not all resources are probed already so we need to - * take that into account. This happens most the time if the - * ena_gpiod comes from a gpio extender or something else. - */ - if (PTR_ERR(init_data) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; - goto clean; - } - - /* - * We need to keep track of any GPIO descriptor coming from the - * device tree until we have handled it over to the core. If the - * config that was passed in to this function DOES NOT contain a - * descriptor, and the config after this call DOES contain a - * descriptor, we definitely got one from parsing the device - * tree. - */ - if (!cfg->ena_gpiod && config->ena_gpiod) - dangling_of_gpiod = true; } ww_mutex_init(&rdev->mutex, ®ulator_ww_class); diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 4a0b3f19bd8e..4f01b1929240 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -695,7 +695,8 @@ static int info_update(void) if (time_after(jiffies, chp_info_expires)) { /* Data is too old, update. */ rc = sclp_chp_read_info(&chp_info); - chp_info_expires = jiffies + CHP_INFO_UPDATE_INTERVAL ; + if (!rc) + chp_info_expires = jiffies + CHP_INFO_UPDATE_INTERVAL; } mutex_unlock(&info_lock); diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index e36e3ea165d3..2f34761e6413 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -588,6 +588,15 @@ out: return ret; } +static void ism_dev_release(struct device *dev) +{ + struct ism_dev *ism; + + ism = container_of(dev, struct ism_dev, dev); + + kfree(ism); +} + static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct ism_dev *ism; @@ -601,6 +610,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_set_drvdata(&pdev->dev, ism); ism->pdev = pdev; ism->dev.parent = &pdev->dev; + ism->dev.release = ism_dev_release; device_initialize(&ism->dev); dev_set_name(&ism->dev, dev_name(&pdev->dev)); ret = device_add(&ism->dev); @@ -637,7 +647,7 @@ err: device_del(&ism->dev); err_dev: dev_set_drvdata(&pdev->dev, NULL); - kfree(ism); + put_device(&ism->dev); return ret; } @@ -682,7 +692,7 @@ static void ism_remove(struct pci_dev *pdev) pci_disable_device(pdev); device_del(&ism->dev); dev_set_drvdata(&pdev->dev, NULL); - kfree(ism); + put_device(&ism->dev); } static struct pci_driver ism_driver = { diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index a3adaec5504e..20328d695ef9 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -7050,14 +7050,16 @@ int qeth_open(struct net_device *dev) card->data.state = CH_STATE_UP; netif_tx_start_all_queues(dev); - local_bh_disable(); qeth_for_each_output_queue(card, queue, i) { netif_napi_add_tx(dev, &queue->napi, qeth_tx_poll); napi_enable(&queue->napi); - napi_schedule(&queue->napi); } - napi_enable(&card->napi); + + local_bh_disable(); + qeth_for_each_output_queue(card, queue, i) { + napi_schedule(&queue->napi); + } napi_schedule(&card->napi); /* kick-start the NAPI softirq: */ local_bh_enable(); diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 1fd2da0264e3..47d74f881948 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -2867,7 +2867,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) dprintk(3, "S/G Segment phys_addr=%x %x, len=0x%x\n", cpu_to_le32(upper_32_bits(dma_handle)), cpu_to_le32(lower_32_bits(dma_handle)), - cpu_to_le32(sg_dma_len(sg_next(s)))); + cpu_to_le32(sg_dma_len(s))); remseg--; } dprintk(5, "qla1280_64bit_start_scsi: Scatter/gather " diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d776f13cd160..f1cfe0bb89b2 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -872,13 +872,18 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) case 0x1a: /* start stop unit in progress */ case 0x1b: /* sanitize in progress */ case 0x1d: /* configuration in progress */ - case 0x24: /* depopulation in progress */ - case 0x25: /* depopulation restore in progress */ action = ACTION_DELAYED_RETRY; break; case 0x0a: /* ALUA state transition */ action = ACTION_DELAYED_REPREP; break; + /* + * Depopulation might take many hours, + * thus it is not worthwhile to retry. + */ + case 0x24: /* depopulation in progress */ + case 0x25: /* depopulation restore in progress */ + fallthrough; default: action = ACTION_FAIL; break; @@ -1664,13 +1669,6 @@ static blk_status_t scsi_prepare_cmd(struct request *req) if (in_flight) __set_bit(SCMD_STATE_INFLIGHT, &cmd->state); - /* - * Only clear the driver-private command data if the LLD does not supply - * a function to initialize that data. - */ - if (!shost->hostt->init_cmd_priv) - memset(cmd + 1, 0, shost->hostt->cmd_size); - cmd->prot_op = SCSI_PROT_NORMAL; if (blk_rq_bytes(req)) cmd->sc_data_direction = rq_dma_dir(req); @@ -1837,6 +1835,13 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, if (!scsi_host_queue_ready(q, shost, sdev, cmd)) goto out_dec_target_busy; + /* + * Only clear the driver-private command data if the LLD does not supply + * a function to initialize that data. + */ + if (shost->hostt->cmd_size && !shost->hostt->init_cmd_priv) + memset(cmd + 1, 0, shost->hostt->cmd_size); + if (!(req->rq_flags & RQF_DONTPREP)) { ret = scsi_prepare_cmd(req); if (ret != BLK_STS_OK) diff --git a/drivers/scsi/scsi_lib_test.c b/drivers/scsi/scsi_lib_test.c index 99834426a100..ae8af0e0047a 100644 --- a/drivers/scsi/scsi_lib_test.c +++ b/drivers/scsi/scsi_lib_test.c @@ -67,6 +67,13 @@ static void scsi_lib_test_multiple_sense(struct kunit *test) }; int i; + /* Success */ + sc.result = 0; + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, &failures)); + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, NULL)); + /* Command failed but caller did not pass in a failures array */ + scsi_build_sense(&sc, 0, ILLEGAL_REQUEST, 0x91, 0x36); + KUNIT_EXPECT_EQ(test, 0, scsi_check_passthrough(&sc, NULL)); /* Match end of array */ scsi_build_sense(&sc, 0, ILLEGAL_REQUEST, 0x91, 0x36); KUNIT_EXPECT_EQ(test, -EAGAIN, scsi_check_passthrough(&sc, &failures)); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 087fcbfc9aaa..96d7e1a9a7c7 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -246,7 +246,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, } ret = sbitmap_init_node(&sdev->budget_map, scsi_device_max_queue_depth(sdev), - new_shift, GFP_KERNEL, + new_shift, GFP_NOIO, sdev->request_queue->node, false, true); if (!ret) sbitmap_resize(&sdev->budget_map, depth); diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 5a101ac06c47..a8614e54544e 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1800,6 +1800,7 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) length = scsi_bufflen(scmnd); payload = (struct vmbus_packet_mpb_array *)&cmd_request->mpb; + payload->range.len = 0; payload_sz = 0; if (scsi_sg_count(scmnd)) { diff --git a/drivers/soc/loongson/loongson2_guts.c b/drivers/soc/loongson/loongson2_guts.c index ae42e3a9127f..16913c3ef65c 100644 --- a/drivers/soc/loongson/loongson2_guts.c +++ b/drivers/soc/loongson/loongson2_guts.c @@ -114,8 +114,11 @@ static int loongson2_guts_probe(struct platform_device *pdev) if (of_property_read_string(root, "model", &machine)) of_property_read_string_index(root, "compatible", 0, &machine); of_node_put(root); - if (machine) + if (machine) { soc_dev_attr.machine = devm_kstrdup(dev, machine, GFP_KERNEL); + if (!soc_dev_attr.machine) + return -ENOMEM; + } svr = loongson2_guts_get_svr(); soc_die = loongson2_soc_die_match(svr, loongson2_soc_die); diff --git a/drivers/soc/qcom/smp2p.c b/drivers/soc/qcom/smp2p.c index 4783ab1adb8d..a3e88ced328a 100644 --- a/drivers/soc/qcom/smp2p.c +++ b/drivers/soc/qcom/smp2p.c @@ -365,7 +365,7 @@ static void smp2p_irq_print_chip(struct irq_data *irqd, struct seq_file *p) { struct smp2p_entry *entry = irq_data_get_irq_chip_data(irqd); - seq_printf(p, " %8s", dev_name(entry->smp2p->dev)); + seq_printf(p, "%8s", dev_name(entry->smp2p->dev)); } static struct irq_chip smp2p_irq_chip = { diff --git a/drivers/spi/atmel-quadspi.c b/drivers/spi/atmel-quadspi.c index abdc49d9d940..d8c9be64d006 100644 --- a/drivers/spi/atmel-quadspi.c +++ b/drivers/spi/atmel-quadspi.c @@ -235,8 +235,8 @@ /** * struct atmel_qspi_pcal - Pad Calibration Clock Division * @pclk_rate: peripheral clock rate. - * @pclkdiv: calibration clock division. The clock applied to the calibration - * cell is divided by pclkdiv + 1. + * @pclk_div: calibration clock division. The clock applied to the calibration + * cell is divided by pclk_div + 1. */ struct atmel_qspi_pcal { u32 pclk_rate; diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 5f9cac41baff..06711a62fa3d 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -399,7 +399,7 @@ static void lpss_ssp_cs_control(struct spi_device *spi, bool enable) lpss_ssp_select_cs(spi, config); mask = LPSS_CS_CONTROL_CS_HIGH; - __lpss_ssp_update_priv(drv_data, config->reg_cs_ctrl, mask, enable ? mask : 0); + __lpss_ssp_update_priv(drv_data, config->reg_cs_ctrl, mask, enable ? 0 : mask); if (config->cs_clk_stays_gated) { /* * Changing CS alone when dynamic clock gating is on won't diff --git a/drivers/spi/spi-sn-f-ospi.c b/drivers/spi/spi-sn-f-ospi.c index 6ad4b729897e..c4969f66a0ba 100644 --- a/drivers/spi/spi-sn-f-ospi.c +++ b/drivers/spi/spi-sn-f-ospi.c @@ -116,6 +116,9 @@ struct f_ospi { static u32 f_ospi_get_dummy_cycle(const struct spi_mem_op *op) { + if (!op->dummy.nbytes) + return 0; + return (op->dummy.nbytes * 8) / op->dummy.buswidth; } diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c index c42cbde8a31b..210648a0092e 100644 --- a/drivers/target/target_core_stat.c +++ b/drivers/target/target_core_stat.c @@ -117,9 +117,9 @@ static ssize_t target_stat_tgt_status_show(struct config_item *item, char *page) { if (to_stat_tgt_dev(item)->export_count) - return snprintf(page, PAGE_SIZE, "activated"); + return snprintf(page, PAGE_SIZE, "activated\n"); else - return snprintf(page, PAGE_SIZE, "deactivated"); + return snprintf(page, PAGE_SIZE, "deactivated\n"); } static ssize_t target_stat_tgt_non_access_lus_show(struct config_item *item, diff --git a/drivers/tee/optee/supp.c b/drivers/tee/optee/supp.c index 322a543b8c27..d0f397c90242 100644 --- a/drivers/tee/optee/supp.c +++ b/drivers/tee/optee/supp.c @@ -80,7 +80,6 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, struct optee *optee = tee_get_drvdata(ctx->teedev); struct optee_supp *supp = &optee->supp; struct optee_supp_req *req; - bool interruptable; u32 ret; /* @@ -111,36 +110,18 @@ u32 optee_supp_thrd_req(struct tee_context *ctx, u32 func, size_t num_params, /* * Wait for supplicant to process and return result, once we've * returned from wait_for_completion(&req->c) successfully we have - * exclusive access again. + * exclusive access again. Allow the wait to be killable such that + * the wait doesn't turn into an indefinite state if the supplicant + * gets hung for some reason. */ - while (wait_for_completion_interruptible(&req->c)) { + if (wait_for_completion_killable(&req->c)) { mutex_lock(&supp->mutex); - interruptable = !supp->ctx; - if (interruptable) { - /* - * There's no supplicant available and since the - * supp->mutex currently is held none can - * become available until the mutex released - * again. - * - * Interrupting an RPC to supplicant is only - * allowed as a way of slightly improving the user - * experience in case the supplicant hasn't been - * started yet. During normal operation the supplicant - * will serve all requests in a timely manner and - * interrupting then wouldn't make sense. - */ - if (req->in_queue) { - list_del(&req->link); - req->in_queue = false; - } + if (req->in_queue) { + list_del(&req->link); + req->in_queue = false; } mutex_unlock(&supp->mutex); - - if (interruptable) { - req->ret = TEEC_ERROR_COMMUNICATION; - break; - } + req->ret = TEEC_ERROR_COMMUNICATION; } ret = req->ret; diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index 280071be30b1..6b7ab1814c12 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -57,8 +57,6 @@ struct time_in_idle { * @max_level: maximum cooling level. One less than total number of valid * cpufreq frequencies. * @em: Reference on the Energy Model of the device - * @cdev: thermal_cooling_device pointer to keep track of the - * registered cooling device. * @policy: cpufreq policy. * @cooling_ops: cpufreq callbacks to thermal cooling device ops * @idle_time: idle time stats diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 3b644de3292e..0d9f636c80f4 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -370,7 +370,7 @@ static void divvy_up_power(struct power_actor *power, int num_actors, for (i = 0; i < num_actors; i++) { struct power_actor *pa = &power[i]; - u64 req_range = (u64)pa->req_power * power_range; + u64 req_range = (u64)pa->weighted_req_power * power_range; pa->granted_power = DIV_ROUND_CLOSEST_ULL(req_range, total_req_power); @@ -641,6 +641,22 @@ clean_state: return ret; } +static void power_allocator_update_weight(struct power_allocator_params *params) +{ + const struct thermal_trip_desc *td; + struct thermal_instance *instance; + + if (!params->trip_max) + return; + + td = trip_to_trip_desc(params->trip_max); + + params->total_weight = 0; + list_for_each_entry(instance, &td->thermal_instances, trip_node) + if (power_actor_is_valid(instance)) + params->total_weight += instance->weight; +} + static void power_allocator_update_tz(struct thermal_zone_device *tz, enum thermal_notify_event reason) { @@ -656,16 +672,12 @@ static void power_allocator_update_tz(struct thermal_zone_device *tz, if (power_actor_is_valid(instance)) num_actors++; - if (num_actors == params->num_actors) - return; + if (num_actors != params->num_actors) + allocate_actors_buffer(params, num_actors); - allocate_actors_buffer(params, num_actors); - break; + fallthrough; case THERMAL_INSTANCE_WEIGHT_CHANGED: - params->total_weight = 0; - list_for_each_entry(instance, &td->thermal_instances, trip_node) - if (power_actor_is_valid(instance)) - params->total_weight += instance->weight; + power_allocator_update_weight(params); break; default: break; @@ -731,6 +743,8 @@ static int power_allocator_bind(struct thermal_zone_device *tz) tz->governor_data = params; + power_allocator_update_weight(params); + return 0; free_params: diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 5ab4ce4daaeb..5401f03d6b6c 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -274,6 +274,34 @@ static bool thermal_of_get_cooling_spec(struct device_node *map_np, int index, return true; } +static bool thermal_of_cm_lookup(struct device_node *cm_np, + const struct thermal_trip *trip, + struct thermal_cooling_device *cdev, + struct cooling_spec *c) +{ + for_each_child_of_node_scoped(cm_np, child) { + struct device_node *tr_np; + int count, i; + + tr_np = of_parse_phandle(child, "trip", 0); + if (tr_np != trip->priv) + continue; + + /* The trip has been found, look up the cdev. */ + count = of_count_phandle_with_args(child, "cooling-device", + "#cooling-cells"); + if (count <= 0) + pr_err("Add a cooling_device property with at least one device\n"); + + for (i = 0; i < count; i++) { + if (thermal_of_get_cooling_spec(child, i, cdev, c)) + return true; + } + } + + return false; +} + static bool thermal_of_should_bind(struct thermal_zone_device *tz, const struct thermal_trip *trip, struct thermal_cooling_device *cdev, @@ -293,27 +321,7 @@ static bool thermal_of_should_bind(struct thermal_zone_device *tz, goto out; /* Look up the trip and the cdev in the cooling maps. */ - for_each_child_of_node_scoped(cm_np, child) { - struct device_node *tr_np; - int count, i; - - tr_np = of_parse_phandle(child, "trip", 0); - if (tr_np != trip->priv) - continue; - - /* The trip has been found, look up the cdev. */ - count = of_count_phandle_with_args(child, "cooling-device", "#cooling-cells"); - if (count <= 0) - pr_err("Add a cooling_device property with at least one device\n"); - - for (i = 0; i < count; i++) { - result = thermal_of_get_cooling_spec(child, i, cdev, c); - if (result) - break; - } - - break; - } + result = thermal_of_cm_lookup(cm_np, trip, cdev, c); of_node_put(cm_np); out: diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index df08f13052ff..8bb1a01fef2a 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -798,7 +798,7 @@ static int ptmx_open(struct inode *inode, struct file *filp) nonseekable_open(inode, filp); /* We refuse fsnotify events on ptmx, since it's a shared resource */ - filp->f_mode |= FMODE_NONOTIFY; + file_set_fsnotify_mode(filp, FMODE_NONOTIFY); retval = tty_alloc_file(filp); if (retval) diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 11e05aa014e5..b861585ca02a 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -374,6 +374,7 @@ static inline int is_omap1510_8250(struct uart_8250_port *pt) #ifdef CONFIG_SERIAL_8250_DMA extern int serial8250_tx_dma(struct uart_8250_port *); +extern void serial8250_tx_dma_flush(struct uart_8250_port *); extern int serial8250_rx_dma(struct uart_8250_port *); extern void serial8250_rx_dma_flush(struct uart_8250_port *); extern int serial8250_request_dma(struct uart_8250_port *); @@ -406,6 +407,7 @@ static inline int serial8250_tx_dma(struct uart_8250_port *p) { return -1; } +static inline void serial8250_tx_dma_flush(struct uart_8250_port *p) { } static inline int serial8250_rx_dma(struct uart_8250_port *p) { return -1; diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c index d215c494ee24..f245a84f4a50 100644 --- a/drivers/tty/serial/8250/8250_dma.c +++ b/drivers/tty/serial/8250/8250_dma.c @@ -149,6 +149,22 @@ err: return ret; } +void serial8250_tx_dma_flush(struct uart_8250_port *p) +{ + struct uart_8250_dma *dma = p->dma; + + if (!dma->tx_running) + return; + + /* + * kfifo_reset() has been called by the serial core, avoid + * advancing and underflowing in __dma_tx_complete(). + */ + dma->tx_size = 0; + + dmaengine_terminate_async(dma->rxchan); +} + int serial8250_rx_dma(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index 64aed7efc569..11c860ea80f6 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -110,7 +110,6 @@ static int of_platform_serial_setup(struct platform_device *ofdev, spin_lock_init(&port->lock); if (resource_type(&resource) == IORESOURCE_IO) { - port->iotype = UPIO_PORT; port->iobase = resource.start; } else { port->mapbase = resource.start; diff --git a/drivers/tty/serial/8250/8250_platform.c b/drivers/tty/serial/8250/8250_platform.c index 8bdc1879d952..c0343bfb8064 100644 --- a/drivers/tty/serial/8250/8250_platform.c +++ b/drivers/tty/serial/8250/8250_platform.c @@ -112,7 +112,6 @@ static int serial8250_probe_acpi(struct platform_device *pdev) struct device *dev = &pdev->dev; struct uart_8250_port uart = { }; struct resource *regs; - unsigned char iotype; int ret, line; regs = platform_get_mem_or_io(pdev, 0); @@ -122,13 +121,11 @@ static int serial8250_probe_acpi(struct platform_device *pdev) switch (resource_type(regs)) { case IORESOURCE_IO: uart.port.iobase = regs->start; - iotype = UPIO_PORT; break; case IORESOURCE_MEM: uart.port.mapbase = regs->start; uart.port.mapsize = resource_size(regs); uart.port.flags = UPF_IOREMAP; - iotype = UPIO_MEM; break; default: return -EINVAL; @@ -147,12 +144,6 @@ static int serial8250_probe_acpi(struct platform_device *pdev) if (ret) return ret; - /* - * The previous call may not set iotype correctly when reg-io-width - * property is absent and it doesn't support IO port resource. - */ - uart.port.iotype = iotype; - line = serial8250_register_8250_port(&uart); if (line < 0) return line; diff --git a/drivers/tty/serial/8250/8250_pnp.c b/drivers/tty/serial/8250/8250_pnp.c index 7c06ae79d8e2..7a837fdf9df1 100644 --- a/drivers/tty/serial/8250/8250_pnp.c +++ b/drivers/tty/serial/8250/8250_pnp.c @@ -436,7 +436,6 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) { struct uart_8250_port uart, *port; int ret, flags = dev_id->driver_data; - unsigned char iotype; long line; if (flags & UNKNOWN_DEV) { @@ -448,14 +447,11 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) memset(&uart, 0, sizeof(uart)); if ((flags & CIR_PORT) && pnp_port_valid(dev, 2)) { uart.port.iobase = pnp_port_start(dev, 2); - iotype = UPIO_PORT; } else if (pnp_port_valid(dev, 0)) { uart.port.iobase = pnp_port_start(dev, 0); - iotype = UPIO_PORT; } else if (pnp_mem_valid(dev, 0)) { uart.port.mapbase = pnp_mem_start(dev, 0); uart.port.mapsize = pnp_mem_len(dev, 0); - iotype = UPIO_MEM; uart.port.flags = UPF_IOREMAP; } else return -ENODEV; @@ -471,12 +467,6 @@ serial_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *dev_id) if (ret) return ret; - /* - * The previous call may not set iotype correctly when reg-io-width - * property is absent and it doesn't support IO port resource. - */ - uart.port.iotype = iotype; - if (flags & CIR_PORT) { uart.port.flags |= UPF_FIXED_PORT | UPF_FIXED_TYPE; uart.port.type = PORT_8250_CIR; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index d7976a21cca9..442967a6cd52 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2555,6 +2555,14 @@ static void serial8250_shutdown(struct uart_port *port) serial8250_do_shutdown(port); } +static void serial8250_flush_buffer(struct uart_port *port) +{ + struct uart_8250_port *up = up_to_u8250p(port); + + if (up->dma) + serial8250_tx_dma_flush(up); +} + static unsigned int serial8250_do_get_divisor(struct uart_port *port, unsigned int baud, unsigned int *frac) @@ -3244,6 +3252,7 @@ static const struct uart_ops serial8250_pops = { .break_ctl = serial8250_break_ctl, .startup = serial8250_startup, .shutdown = serial8250_shutdown, + .flush_buffer = serial8250_flush_buffer, .set_termios = serial8250_set_termios, .set_ldisc = serial8250_set_ldisc, .pm = serial8250_pm, diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 7b51cdc274fd..560f45ed19ae 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1561,7 +1561,7 @@ int sc16is7xx_probe(struct device *dev, const struct sc16is7xx_devtype *devtype, /* Always ask for fixed clock rate from a property. */ device_property_read_u32(dev, "clock-frequency", &uartclk); - s->polling = !!irq; + s->polling = (irq <= 0); if (s->polling) dev_dbg(dev, "No interrupt pin definition, falling back to polling mode\n"); diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c index d35f1d24156c..2fc48cd63f6c 100644 --- a/drivers/tty/serial/serial_port.c +++ b/drivers/tty/serial/serial_port.c @@ -173,6 +173,7 @@ EXPORT_SYMBOL(uart_remove_one_port); * The caller is responsible to initialize the following fields of the @port * ->dev (must be valid) * ->flags + * ->iobase * ->mapbase * ->mapsize * ->regshift (if @use_defaults is false) @@ -214,7 +215,7 @@ static int __uart_read_properties(struct uart_port *port, bool use_defaults) /* Read the registers I/O access type (default: MMIO 8-bit) */ ret = device_property_read_u32(dev, "reg-io-width", &value); if (ret) { - port->iotype = UPIO_MEM; + port->iotype = port->iobase ? UPIO_PORT : UPIO_MEM; } else { switch (value) { case 1: @@ -227,15 +228,16 @@ static int __uart_read_properties(struct uart_port *port, bool use_defaults) port->iotype = device_is_big_endian(dev) ? UPIO_MEM32BE : UPIO_MEM32; break; default: - if (!use_defaults) { - dev_err(dev, "Unsupported reg-io-width (%u)\n", value); - return -EINVAL; - } port->iotype = UPIO_UNKNOWN; break; } } + if (!use_defaults && port->iotype == UPIO_UNKNOWN) { + dev_err(dev, "Unsupported reg-io-width (%u)\n", value); + return -EINVAL; + } + /* Read the address mapping base offset (default: no offset) */ ret = device_property_read_u32(dev, "reg-offset", &value); if (ret) diff --git a/drivers/ufs/core/ufs_bsg.c b/drivers/ufs/core/ufs_bsg.c index 8d4ad0a3f2cf..252186124669 100644 --- a/drivers/ufs/core/ufs_bsg.c +++ b/drivers/ufs/core/ufs_bsg.c @@ -194,10 +194,12 @@ out: ufshcd_rpm_put_sync(hba); kfree(buff); bsg_reply->result = ret; - job->reply_len = !rpmb ? sizeof(struct ufs_bsg_reply) : sizeof(struct ufs_rpmb_reply); /* complete the job here only if no error */ - if (ret == 0) + if (ret == 0) { + job->reply_len = rpmb ? sizeof(struct ufs_rpmb_reply) : + sizeof(struct ufs_bsg_reply); bsg_job_done(job, ret, bsg_reply->reply_payload_rcv_len); + } return ret; } diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index cd404ade48dc..464f13da259a 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -266,7 +266,7 @@ static bool ufshcd_has_pending_tasks(struct ufs_hba *hba) static bool ufshcd_is_ufs_dev_busy(struct ufs_hba *hba) { - return hba->outstanding_reqs || ufshcd_has_pending_tasks(hba); + return scsi_host_busy(hba->host) || ufshcd_has_pending_tasks(hba); } static const struct ufs_dev_quirk ufs_fixups[] = { @@ -628,8 +628,8 @@ static void ufshcd_print_host_state(struct ufs_hba *hba) const struct scsi_device *sdev_ufs = hba->ufs_device_wlun; dev_err(hba->dev, "UFS Host state=%d\n", hba->ufshcd_state); - dev_err(hba->dev, "outstanding reqs=0x%lx tasks=0x%lx\n", - hba->outstanding_reqs, hba->outstanding_tasks); + dev_err(hba->dev, "%d outstanding reqs, tasks=0x%lx\n", + scsi_host_busy(hba->host), hba->outstanding_tasks); dev_err(hba->dev, "saved_err=0x%x, saved_uic_err=0x%x\n", hba->saved_err, hba->saved_uic_err); dev_err(hba->dev, "Device power mode=%d, UIC link state=%d\n", @@ -2120,8 +2120,6 @@ static void ufshcd_init_clk_gating(struct ufs_hba *hba) INIT_DELAYED_WORK(&hba->clk_gating.gate_work, ufshcd_gate_work); INIT_WORK(&hba->clk_gating.ungate_work, ufshcd_ungate_work); - spin_lock_init(&hba->clk_gating.lock); - hba->clk_gating.clk_gating_workq = alloc_ordered_workqueue( "ufs_clk_gating_%d", WQ_MEM_RECLAIM | WQ_HIGHPRI, hba->host->host_no); @@ -3106,8 +3104,13 @@ ufshcd_dev_cmd_completion(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) case UPIU_TRANSACTION_QUERY_RSP: { u8 response = lrbp->ucd_rsp_ptr->header.response; - if (response == 0) + if (response == 0) { err = ufshcd_copy_query_response(hba, lrbp); + } else { + err = -EINVAL; + dev_err(hba->dev, "%s: unexpected response in Query RSP: %x\n", + __func__, response); + } break; } case UPIU_TRANSACTION_REJECT_UPIU: @@ -5976,24 +5979,6 @@ out: __func__, err); } -static void ufshcd_temp_exception_event_handler(struct ufs_hba *hba, u16 status) -{ - u32 value; - - if (ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, - QUERY_ATTR_IDN_CASE_ROUGH_TEMP, 0, 0, &value)) - return; - - dev_info(hba->dev, "exception Tcase %d\n", value - 80); - - ufs_hwmon_notify_event(hba, status & MASK_EE_URGENT_TEMP); - - /* - * A placeholder for the platform vendors to add whatever additional - * steps required - */ -} - static int __ufshcd_wb_toggle(struct ufs_hba *hba, bool set, enum flag_idn idn) { u8 index; @@ -6214,7 +6199,7 @@ static void ufshcd_exception_event_handler(struct work_struct *work) ufshcd_bkops_exception_event_handler(hba); if (status & hba->ee_drv_mask & MASK_EE_URGENT_TEMP) - ufshcd_temp_exception_event_handler(hba, status); + ufs_hwmon_notify_event(hba, status & MASK_EE_URGENT_TEMP); ufs_debugfs_exception_event(hba, status); } @@ -8897,7 +8882,7 @@ static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd) dev_info(hba->dev, "%s() finished; outstanding_tasks = %#lx.\n", __func__, hba->outstanding_tasks); - return hba->outstanding_reqs ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE; + return scsi_host_busy(hba->host) ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE; } static const struct attribute_group *ufshcd_driver_groups[] = { @@ -9160,7 +9145,7 @@ out: if (!IS_ERR_OR_NULL(clki->clk) && clki->enabled) clk_disable_unprepare(clki->clk); } - } else if (!ret && on) { + } else if (!ret && on && hba->clk_gating.is_initialized) { scoped_guard(spinlock_irqsave, &hba->clk_gating.lock) hba->clk_gating.state = CLKS_ON; trace_ufshcd_clk_gating(dev_name(hba->dev), @@ -10247,16 +10232,6 @@ EXPORT_SYMBOL_GPL(ufshcd_system_thaw); #endif /* CONFIG_PM_SLEEP */ /** - * ufshcd_dealloc_host - deallocate Host Bus Adapter (HBA) - * @hba: pointer to Host Bus Adapter (HBA) - */ -void ufshcd_dealloc_host(struct ufs_hba *hba) -{ - scsi_host_put(hba->host); -} -EXPORT_SYMBOL_GPL(ufshcd_dealloc_host); - -/** * ufshcd_set_dma_mask - Set dma mask based on the controller * addressing capability * @hba: per adapter instance @@ -10275,11 +10250,25 @@ static int ufshcd_set_dma_mask(struct ufs_hba *hba) } /** + * ufshcd_devres_release - devres cleanup handler, invoked during release of + * hba->dev + * @host: pointer to SCSI host + */ +static void ufshcd_devres_release(void *host) +{ + scsi_host_put(host); +} + +/** * ufshcd_alloc_host - allocate Host Bus Adapter (HBA) * @dev: pointer to device handle * @hba_handle: driver private handle * * Return: 0 on success, non-zero value on failure. + * + * NOTE: There is no corresponding ufshcd_dealloc_host() because this function + * keeps track of its allocations using devres and deallocates everything on + * device removal automatically. */ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) { @@ -10301,6 +10290,13 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) err = -ENOMEM; goto out_error; } + + err = devm_add_action_or_reset(dev, ufshcd_devres_release, + host); + if (err) + return dev_err_probe(dev, err, + "failed to add ufshcd dealloc action\n"); + host->nr_maps = HCTX_TYPE_POLL + 1; hba = shost_priv(host); hba->host = host; @@ -10429,6 +10425,27 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) hba->irq = irq; hba->vps = &ufs_hba_vps; + /* + * Initialize clk_gating.lock early since it is being used in + * ufshcd_setup_clocks() + */ + spin_lock_init(&hba->clk_gating.lock); + + /* + * Set the default power management level for runtime and system PM. + * Host controller drivers can override them in their + * 'ufs_hba_variant_ops::init' callback. + * + * Default power saving mode is to keep UFS link in Hibern8 state + * and UFS device in sleep state. + */ + hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( + UFS_SLEEP_PWR_MODE, + UIC_LINK_HIBERN8_STATE); + hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( + UFS_SLEEP_PWR_MODE, + UIC_LINK_HIBERN8_STATE); + err = ufshcd_hba_init(hba); if (err) goto out_error; @@ -10542,21 +10559,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) goto out_disable; } - /* - * Set the default power management level for runtime and system PM if - * not set by the host controller drivers. - * Default power saving mode is to keep UFS link in Hibern8 state - * and UFS device in sleep state. - */ - if (!hba->rpm_lvl) - hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( - UFS_SLEEP_PWR_MODE, - UIC_LINK_HIBERN8_STATE); - if (!hba->spm_lvl) - hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state( - UFS_SLEEP_PWR_MODE, - UIC_LINK_HIBERN8_STATE); - INIT_DELAYED_WORK(&hba->rpm_dev_flush_recheck_work, ufshcd_rpm_dev_flush_recheck_work); INIT_DELAYED_WORK(&hba->ufs_rtc_update_work, ufshcd_rtc_work); diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index ea39c5d5b8cf..9cfcaad23cf9 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -562,7 +562,6 @@ static void ufshcd_pci_remove(struct pci_dev *pdev) pm_runtime_forbid(&pdev->dev); pm_runtime_get_noresume(&pdev->dev); ufshcd_remove(hba); - ufshcd_dealloc_host(hba); } /** @@ -605,7 +604,6 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) err = ufshcd_init(hba, mmio_base, pdev->irq); if (err) { dev_err(&pdev->dev, "Initialization failed\n"); - ufshcd_dealloc_host(hba); return err; } diff --git a/drivers/ufs/host/ufshcd-pltfrm.c b/drivers/ufs/host/ufshcd-pltfrm.c index 505572d4fa87..ffe5d1d2b215 100644 --- a/drivers/ufs/host/ufshcd-pltfrm.c +++ b/drivers/ufs/host/ufshcd-pltfrm.c @@ -465,21 +465,17 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, struct device *dev = &pdev->dev; mmio_base = devm_platform_ioremap_resource(pdev, 0); - if (IS_ERR(mmio_base)) { - err = PTR_ERR(mmio_base); - goto out; - } + if (IS_ERR(mmio_base)) + return PTR_ERR(mmio_base); irq = platform_get_irq(pdev, 0); - if (irq < 0) { - err = irq; - goto out; - } + if (irq < 0) + return irq; err = ufshcd_alloc_host(dev, &hba); if (err) { dev_err(dev, "Allocation failed\n"); - goto out; + return err; } hba->vops = vops; @@ -488,13 +484,13 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, if (err) { dev_err(dev, "%s: clock parse failed %d\n", __func__, err); - goto dealloc_host; + return err; } err = ufshcd_parse_regulator_info(hba); if (err) { dev_err(dev, "%s: regulator init failed %d\n", __func__, err); - goto dealloc_host; + return err; } ufshcd_init_lanes_per_dir(hba); @@ -502,25 +498,20 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, err = ufshcd_parse_operating_points(hba); if (err) { dev_err(dev, "%s: OPP parse failed %d\n", __func__, err); - goto dealloc_host; + return err; } err = ufshcd_init(hba, mmio_base, irq); if (err) { dev_err_probe(dev, err, "Initialization failed with error %d\n", err); - goto dealloc_host; + return err; } pm_runtime_set_active(dev); pm_runtime_enable(dev); return 0; - -dealloc_host: - ufshcd_dealloc_host(hba); -out: - return err; } EXPORT_SYMBOL_GPL(ufshcd_pltfrm_init); @@ -534,7 +525,6 @@ void ufshcd_pltfrm_remove(struct platform_device *pdev) pm_runtime_get_sync(&pdev->dev); ufshcd_remove(hba); - ufshcd_dealloc_host(hba); pm_runtime_disable(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); } diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 6b37d1c47fce..c2ecfa3c8349 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -371,7 +371,7 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf) static void acm_ctrl_irq(struct urb *urb) { struct acm *acm = urb->context; - struct usb_cdc_notification *dr = urb->transfer_buffer; + struct usb_cdc_notification *dr; unsigned int current_size = urb->actual_length; unsigned int expected_size, copy_size, alloc_size; int retval; @@ -398,14 +398,25 @@ static void acm_ctrl_irq(struct urb *urb) usb_mark_last_busy(acm->dev); - if (acm->nb_index) + if (acm->nb_index == 0) { + /* + * The first chunk of a message must contain at least the + * notification header with the length field, otherwise we + * can't get an expected_size. + */ + if (current_size < sizeof(struct usb_cdc_notification)) { + dev_dbg(&acm->control->dev, "urb too short\n"); + goto exit; + } + dr = urb->transfer_buffer; + } else { dr = (struct usb_cdc_notification *)acm->notification_buffer; - + } /* size = notification-header + (optional) data */ expected_size = sizeof(struct usb_cdc_notification) + le16_to_cpu(dr->wLength); - if (current_size < expected_size) { + if (acm->nb_index != 0 || current_size < expected_size) { /* notification is transmitted fragmented, reassemble */ if (acm->nb_size < expected_size) { u8 *new_buffer; @@ -1727,13 +1738,16 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, - { USB_DEVICE(0x045b, 0x023c), /* Renesas USB Download mode */ + { USB_DEVICE(0x045b, 0x023c), /* Renesas R-Car H3 USB Download mode */ + .driver_info = DISABLE_ECHO, /* Don't echo banner */ + }, + { USB_DEVICE(0x045b, 0x0247), /* Renesas R-Car D3 USB Download mode */ .driver_info = DISABLE_ECHO, /* Don't echo banner */ }, - { USB_DEVICE(0x045b, 0x0248), /* Renesas USB Download mode */ + { USB_DEVICE(0x045b, 0x0248), /* Renesas R-Car M3-N USB Download mode */ .driver_info = DISABLE_ECHO, /* Don't echo banner */ }, - { USB_DEVICE(0x045b, 0x024D), /* Renesas USB Download mode */ + { USB_DEVICE(0x045b, 0x024D), /* Renesas R-Car E3 USB Download mode */ .driver_info = DISABLE_ECHO, /* Don't echo banner */ }, { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index c3f839637cb5..a76bb50b6202 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1849,6 +1849,17 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) hdev = interface_to_usbdev(intf); /* + * The USB 2.0 spec prohibits hubs from having more than one + * configuration or interface, and we rely on this prohibition. + * Refuse to accept a device that violates it. + */ + if (hdev->descriptor.bNumConfigurations > 1 || + hdev->actconfig->desc.bNumInterfaces > 1) { + dev_err(&intf->dev, "Invalid hub with more than one config or interface\n"); + return -EINVAL; + } + + /* * Set default autosuspend delay as 0 to speedup bus suspend, * based on the below considerations: * @@ -4698,7 +4709,6 @@ void usb_ep0_reinit(struct usb_device *udev) EXPORT_SYMBOL_GPL(usb_ep0_reinit); #define usb_sndaddr0pipe() (PIPE_CONTROL << 30) -#define usb_rcvaddr0pipe() ((PIPE_CONTROL << 30) | USB_DIR_IN) static int hub_set_address(struct usb_device *udev, int devnum) { @@ -4804,7 +4814,7 @@ static int get_bMaxPacketSize0(struct usb_device *udev, for (i = 0; i < GET_MAXPACKET0_TRIES; ++i) { /* Start with invalid values in case the transfer fails */ buf->bDescriptorType = buf->bMaxPacketSize0 = 0; - rc = usb_control_msg(udev, usb_rcvaddr0pipe(), + rc = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), USB_REQ_GET_DESCRIPTOR, USB_DIR_IN, USB_DT_DEVICE << 8, 0, buf, size, diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 67732c791c93..dfcfc142bd5e 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -435,6 +435,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0c45, 0x7056), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Sony Xperia XZ1 Compact (lilac) smartphone in fastboot mode */ + { USB_DEVICE(0x0fce, 0x0dde), .driver_info = USB_QUIRK_NO_LPM }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, @@ -525,6 +528,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Blackmagic Design UltraStudio SDI */ { USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM }, + /* Teclast disk */ + { USB_DEVICE(0x1f75, 0x0917), .driver_info = USB_QUIRK_NO_LPM }, + /* Hauppauge HVR-950q */ { USB_DEVICE(0x2040, 0x7200), .driver_info = USB_QUIRK_CONFIG_INTF_STRINGS }, diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index e7bf9cc635be..bd4c788f03bc 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -4615,6 +4615,7 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget) spin_lock_irqsave(&hsotg->lock, flags); hsotg->driver = NULL; + hsotg->gadget.dev.of_node = NULL; hsotg->gadget.speed = USB_SPEED_UNKNOWN; hsotg->enabled = 0; diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index ac7c730f81ac..c955039bb4f6 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -717,6 +717,7 @@ struct dwc3_event_buffer { /** * struct dwc3_ep - device side endpoint representation * @endpoint: usb endpoint + * @nostream_work: work for handling bulk NoStream * @cancelled_list: list of cancelled requests for this endpoint * @pending_list: list of pending requests for this endpoint * @started_list: list of started requests on this endpoint diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index d27af65eb08a..ddd6b2ce5710 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2629,10 +2629,38 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on) { u32 reg; u32 timeout = 2000; + u32 saved_config = 0; if (pm_runtime_suspended(dwc->dev)) return 0; + /* + * When operating in USB 2.0 speeds (HS/FS), ensure that + * GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY are cleared before starting + * or stopping the controller. This resolves timeout issues that occur + * during frequent role switches between host and device modes. + * + * Save and clear these settings, then restore them after completing the + * controller start or stop sequence. + * + * This solution was discovered through experimentation as it is not + * mentioned in the dwc3 programming guide. It has been tested on an + * Exynos platforms. + */ + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + if (reg & DWC3_GUSB2PHYCFG_SUSPHY) { + saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; + reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; + } + + if (reg & DWC3_GUSB2PHYCFG_ENBLSLPM) { + saved_config |= DWC3_GUSB2PHYCFG_ENBLSLPM; + reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM; + } + + if (saved_config) + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); + reg = dwc3_readl(dwc->regs, DWC3_DCTL); if (is_on) { if (DWC3_VER_IS_WITHIN(DWC3, ANY, 187A)) { @@ -2660,6 +2688,12 @@ static int dwc3_gadget_run_stop(struct dwc3 *dwc, int is_on) reg &= DWC3_DSTS_DEVCTRLHLT; } while (--timeout && !(!is_on ^ !reg)); + if (saved_config) { + reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); + reg |= saved_config; + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); + } + if (!timeout) return -ETIMEDOUT; diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c index 837fcdfa3840..da82598fcef8 100644 --- a/drivers/usb/gadget/function/f_midi.c +++ b/drivers/usb/gadget/function/f_midi.c @@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req) /* Our transmit completed. See if there's more to go. * f_midi_transmit eats req, don't queue it again. */ req->length = 0; - f_midi_transmit(midi); + queue_work(system_highpri_wq, &midi->work); return; } break; @@ -907,6 +907,15 @@ static int f_midi_bind(struct usb_configuration *c, struct usb_function *f) status = -ENODEV; + /* + * Reset wMaxPacketSize with maximum packet size of FS bulk transfer before + * endpoint claim. This ensures that the wMaxPacketSize does not exceed the + * limit during bind retries where configured dwc3 TX/RX FIFO's maxpacket + * size of 512 bytes for IN/OUT endpoints in support HS speed only. + */ + bulk_in_desc.wMaxPacketSize = cpu_to_le16(64); + bulk_out_desc.wMaxPacketSize = cpu_to_le16(64); + /* allocate instance-specific endpoints */ midi->in_ep = usb_ep_autoconfig(cdev->gadget, &bulk_in_desc); if (!midi->in_ep) @@ -1000,11 +1009,11 @@ static int f_midi_bind(struct usb_configuration *c, struct usb_function *f) } /* configure the endpoint descriptors ... */ - ms_out_desc.bLength = USB_DT_MS_ENDPOINT_SIZE(midi->in_ports); - ms_out_desc.bNumEmbMIDIJack = midi->in_ports; + ms_out_desc.bLength = USB_DT_MS_ENDPOINT_SIZE(midi->out_ports); + ms_out_desc.bNumEmbMIDIJack = midi->out_ports; - ms_in_desc.bLength = USB_DT_MS_ENDPOINT_SIZE(midi->out_ports); - ms_in_desc.bNumEmbMIDIJack = midi->out_ports; + ms_in_desc.bLength = USB_DT_MS_ENDPOINT_SIZE(midi->in_ports); + ms_in_desc.bNumEmbMIDIJack = midi->in_ports; /* ... and add them to the list */ endpoint_descriptor_index = i; diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 79e223713d8b..fb77b0b21790 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -818,7 +818,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc) return -EINVAL; /* Allocate a kthread for asynchronous hw submit handler. */ - video->kworker = kthread_create_worker(0, "UVCG"); + video->kworker = kthread_run_worker(0, "UVCG"); if (IS_ERR(video->kworker)) { uvcg_err(&video->uvc->func, "failed to create UVCG kworker\n"); return PTR_ERR(video->kworker); diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index a6f46364be65..4b3d5075621a 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1543,8 +1543,8 @@ void usb_del_gadget(struct usb_gadget *gadget) kobject_uevent(&udc->dev.kobj, KOBJ_REMOVE); sysfs_remove_link(&udc->dev.kobj, "gadget"); - flush_work(&gadget->work); device_del(&gadget->dev); + flush_work(&gadget->work); ida_free(&gadget_id_numbers, gadget->id_number); cancel_work_sync(&udc->vbus_work); device_unregister(&udc->dev); diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index fce5c41d9f29..89b304cf6d03 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -310,7 +310,7 @@ struct renesas_usb3_request { struct list_head queue; }; -#define USB3_EP_NAME_SIZE 8 +#define USB3_EP_NAME_SIZE 16 struct renesas_usb3_ep { struct usb_ep ep; struct renesas_usb3 *usb3; diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index 1f9c1b1435d8..0404489c2f6a 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -958,6 +958,15 @@ static void quirk_usb_disable_ehci(struct pci_dev *pdev) * booting from USB disk or using a usb keyboard */ hcc_params = readl(base + EHCI_HCC_PARAMS); + + /* LS7A EHCI controller doesn't have extended capabilities, the + * EECP (EHCI Extended Capabilities Pointer) field of HCCPARAMS + * register should be 0x0 but it reads as 0xa0. So clear it to + * avoid error messages on boot. + */ + if (pdev->vendor == PCI_VENDOR_ID_LOONGSON && pdev->device == 0x7a14) + hcc_params &= ~(0xffL << 8); + offset = (hcc_params >> 8) & 0xff; while (offset && --count) { pci_read_config_dword(pdev, offset, &cap); diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 2d1e205c14c6..ad0ff356f6fa 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -653,8 +653,8 @@ put_runtime_pm: } EXPORT_SYMBOL_NS_GPL(xhci_pci_common_probe, "xhci"); -static const struct pci_device_id pci_ids_reject[] = { - /* handled by xhci-pci-renesas */ +/* handled by xhci-pci-renesas if enabled */ +static const struct pci_device_id pci_ids_renesas[] = { { PCI_DEVICE(PCI_VENDOR_ID_RENESAS, 0x0014) }, { PCI_DEVICE(PCI_VENDOR_ID_RENESAS, 0x0015) }, { /* end: all zeroes */ } @@ -662,7 +662,8 @@ static const struct pci_device_id pci_ids_reject[] = { static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { - if (pci_match_id(pci_ids_reject, dev)) + if (IS_ENABLED(CONFIG_USB_XHCI_PCI_RENESAS) && + pci_match_id(pci_ids_renesas, dev)) return -ENODEV; return xhci_pci_common_probe(dev, id); diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c index 6c3ececf9137..8423be59ec0f 100644 --- a/drivers/usb/phy/phy-generic.c +++ b/drivers/usb/phy/phy-generic.c @@ -212,7 +212,7 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop) if (of_property_read_u32(node, "clock-frequency", &clk_rate)) clk_rate = 0; - needs_clk = of_property_read_bool(node, "clocks"); + needs_clk = of_property_present(node, "clocks"); } nop->gpiod_reset = devm_gpiod_get_optional(dev, "reset", GPIOD_ASIS); diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c index c58a12c147f4..30482d4cf826 100644 --- a/drivers/usb/roles/class.c +++ b/drivers/usb/roles/class.c @@ -387,8 +387,11 @@ usb_role_switch_register(struct device *parent, dev_set_name(&sw->dev, "%s-role-switch", desc->name ? desc->name : dev_name(parent)); + sw->registered = true; + ret = device_register(&sw->dev); if (ret) { + sw->registered = false; put_device(&sw->dev); return ERR_PTR(ret); } @@ -399,8 +402,6 @@ usb_role_switch_register(struct device *parent, dev_warn(&sw->dev, "failed to add component\n"); } - sw->registered = true; - /* TODO: Symlinks for the host port and the device controller. */ return sw; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1e2ae0c6c41c..58bd54e8c483 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -619,15 +619,6 @@ static void option_instat_callback(struct urb *urb); /* Luat Air72*U series based on UNISOC UIS8910 uses UNISOC's vendor ID */ #define LUAT_PRODUCT_AIR720U 0x4e00 -/* MeiG Smart Technology products */ -#define MEIGSMART_VENDOR_ID 0x2dee -/* MeiG Smart SRM815/SRM825L based on Qualcomm 315 */ -#define MEIGSMART_PRODUCT_SRM825L 0x4d22 -/* MeiG Smart SLM320 based on UNISOC UIS8910 */ -#define MEIGSMART_PRODUCT_SLM320 0x4d41 -/* MeiG Smart SLM770A based on ASR1803 */ -#define MEIGSMART_PRODUCT_SLM770A 0x4d57 - /* Device flags */ /* Highest interface number which can be used with NCTRL() and RSVD() */ @@ -1367,15 +1358,15 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1063, 0xff), /* Telit LN920 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1070, 0xff), /* Telit FN990 (rmnet) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1070, 0xff), /* Telit FN990A (rmnet) */ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1071, 0xff), /* Telit FN990 (MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1071, 0xff), /* Telit FN990A (MBIM) */ .driver_info = NCTRL(0) | RSVD(1) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1072, 0xff), /* Telit FN990 (RNDIS) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1072, 0xff), /* Telit FN990A (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990 (ECM) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990A (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, - { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990 (PCIe) */ + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1075, 0xff), /* Telit FN990A (PCIe) */ .driver_info = RSVD(0) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1080, 0xff), /* Telit FE990 (rmnet) */ .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, @@ -1403,6 +1394,22 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | NCTRL(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x10c8, 0xff), /* Telit FE910C04 (rmnet) */ .driver_info = RSVD(0) | NCTRL(2) | RSVD(3) | RSVD(4) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x60) }, /* Telit FN990B (rmnet) */ + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x40) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d0, 0x30), + .driver_info = NCTRL(5) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x60) }, /* Telit FN990B (MBIM) */ + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x40) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d1, 0x30), + .driver_info = NCTRL(6) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x60) }, /* Telit FN990B (RNDIS) */ + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x40) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d2, 0x30), + .driver_info = NCTRL(6) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x60) }, /* Telit FN990B (ECM) */ + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x40) }, + { USB_DEVICE_INTERFACE_PROTOCOL(TELIT_VENDOR_ID, 0x10d3, 0x30), + .driver_info = NCTRL(6) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), @@ -2347,6 +2354,14 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a05, 0xff) }, /* Fibocom FM650-CN (NCM mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a06, 0xff) }, /* Fibocom FM650-CN (RNDIS mode) */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0a07, 0xff) }, /* Fibocom FM650-CN (MBIM mode) */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d41, 0xff, 0, 0) }, /* MeiG Smart SLM320 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d57, 0xff, 0, 0) }, /* MeiG Smart SLM770A */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0, 0) }, /* MeiG Smart SRM815 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0x10, 0x02) }, /* MeiG Smart SLM828 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0x10, 0x03) }, /* MeiG Smart SLM828 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0xff, 0x30) }, /* MeiG Smart SRM815 and SRM825L */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0xff, 0x40) }, /* MeiG Smart SRM825L */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2dee, 0x4d22, 0xff, 0xff, 0x60) }, /* MeiG Smart SRM825L */ { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ @@ -2403,12 +2418,6 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(SIERRA_VENDOR_ID, SIERRA_PRODUCT_EM9191, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, TOZED_PRODUCT_LT70C, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(UNISOC_VENDOR_ID, LUAT_PRODUCT_AIR720U, 0xff, 0, 0) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM320, 0xff, 0, 0) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SLM770A, 0xff, 0, 0) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0, 0) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x30) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x40) }, - { USB_DEVICE_AND_INTERFACE_INFO(MEIGSMART_VENDOR_ID, MEIGSMART_PRODUCT_SRM825L, 0xff, 0xff, 0x60) }, { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0530, 0xff), /* TCL IK512 MBIM */ .driver_info = NCTRL(1) }, { USB_DEVICE_INTERFACE_CLASS(0x1bbb, 0x0640, 0xff), /* TCL IK512 ECM */ diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 47be450d2be3..6bf1a22c785a 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5591,8 +5591,7 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_auto_vbus_discharge_threshold(port, TYPEC_PWR_MODE_USB, port->pps_data.active, 0); tcpm_set_charge(port, false); - tcpm_set_state(port, hard_reset_state(port), - port->timings.ps_src_off_time); + tcpm_set_state(port, ERROR_RECOVERY, port->timings.ps_src_off_time); break; case PR_SWAP_SNK_SRC_SOURCE_ON: tcpm_enable_auto_vbus_discharge(port, true); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9ac25d08f473..63612faeab72 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -666,7 +666,7 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, worker, name); - if (!vtsk) + if (IS_ERR(vtsk)) goto free_worker; mutex_init(&worker->mutex); diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index a337edcf8faf..1f65795cf5d7 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -74,19 +74,21 @@ static inline phys_addr_t xen_dma_to_phys(struct device *dev, return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr)); } +static inline bool range_requires_alignment(phys_addr_t p, size_t size) +{ + phys_addr_t algn = 1ULL << (get_order(size) + PAGE_SHIFT); + phys_addr_t bus_addr = pfn_to_bfn(XEN_PFN_DOWN(p)) << XEN_PAGE_SHIFT; + + return IS_ALIGNED(p, algn) && !IS_ALIGNED(bus_addr, algn); +} + static inline int range_straddles_page_boundary(phys_addr_t p, size_t size) { unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p); unsigned int i, nr_pages = XEN_PFN_UP(xen_offset_in_page(p) + size); - phys_addr_t algn = 1ULL << (get_order(size) + PAGE_SHIFT); next_bfn = pfn_to_bfn(xen_pfn); - /* If buffer is physically aligned, ensure DMA alignment. */ - if (IS_ALIGNED(p, algn) && - !IS_ALIGNED((phys_addr_t)next_bfn << XEN_PAGE_SHIFT, algn)) - return 1; - for (i = 1; i < nr_pages; i++) if (pfn_to_bfn(++xen_pfn) != ++next_bfn) return 1; @@ -111,7 +113,7 @@ static struct io_tlb_pool *xen_swiotlb_find_pool(struct device *dev, } #ifdef CONFIG_X86 -int xen_swiotlb_fixup(void *buf, unsigned long nslabs) +int __init xen_swiotlb_fixup(void *buf, unsigned long nslabs) { int rc; unsigned int order = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT); @@ -156,7 +158,8 @@ xen_swiotlb_alloc_coherent(struct device *dev, size_t size, *dma_handle = xen_phys_to_dma(dev, phys); if (*dma_handle + size - 1 > dma_mask || - range_straddles_page_boundary(phys, size)) { + range_straddles_page_boundary(phys, size) || + range_requires_alignment(phys, size)) { if (xen_create_contiguous_region(phys, order, fls64(dma_mask), dma_handle) != 0) goto out_free_pages; @@ -182,7 +185,8 @@ xen_swiotlb_free_coherent(struct device *dev, size_t size, void *vaddr, size = ALIGN(size, XEN_PAGE_SIZE); if (WARN_ON_ONCE(dma_handle + size - 1 > dev->coherent_dma_mask) || - WARN_ON_ONCE(range_straddles_page_boundary(phys, size))) + WARN_ON_ONCE(range_straddles_page_boundary(phys, size) || + range_requires_alignment(phys, size))) return; if (TestClearPageXenRemapped(virt_to_page(vaddr))) diff --git a/fs/afs/server.c b/fs/afs/server.c index 038f9d0ae3af..4504e16b458c 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -163,6 +163,8 @@ static struct afs_server *afs_install_server(struct afs_cell *cell, rb_insert_color(&server->uuid_rb, &net->fs_servers); hlist_add_head_rcu(&server->proc_link, &net->fs_proc); + afs_get_cell(cell, afs_cell_trace_get_server); + added_dup: write_seqlock(&net->fs_addr_lock); estate = rcu_dereference_protected(server->endpoint_state, @@ -442,6 +444,7 @@ static void afs_server_rcu(struct rcu_head *rcu) atomic_read(&server->active), afs_server_trace_free); afs_put_endpoint_state(rcu_access_pointer(server->endpoint_state), afs_estate_trace_put_server); + afs_put_cell(server->cell, afs_cell_trace_put_server); kfree(server); } diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 7e7e567a7f8a..d20cd902ef94 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -97,8 +97,8 @@ struct afs_server_list *afs_alloc_server_list(struct afs_volume *volume, break; if (j < slist->nr_servers) { if (slist->servers[j].server == server) { - afs_put_server(volume->cell->net, server, - afs_server_trace_put_slist_isort); + afs_unuse_server(volume->cell->net, server, + afs_server_trace_put_slist_isort); continue; } diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index 85eea7a4dea3..fc7efd0a7525 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -61,6 +61,13 @@ config BCACHEFS_DEBUG The resulting code will be significantly slower than normal; you probably shouldn't select this option unless you're a developer. +config BCACHEFS_INJECT_TRANSACTION_RESTARTS + bool "Randomly inject transaction restarts" + depends on BCACHEFS_DEBUG + help + Randomly inject transaction restarts in a few core paths - may have a + significant performance penalty + config BCACHEFS_TESTS bool "bcachefs unit and performance tests" depends on BCACHEFS_FS diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index fc2ef33b67b3..3ea809990ef1 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -1803,7 +1803,6 @@ struct discard_buckets_state { u64 open; u64 need_journal_commit; u64 discarded; - u64 need_journal_commit_this_dev; }; static int bch2_discard_one_bucket(struct btree_trans *trans, @@ -1827,11 +1826,11 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, goto out; } - if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, - c->journal.flushed_seq_ondisk, - pos.inode, pos.offset)) { - s->need_journal_commit++; - s->need_journal_commit_this_dev++; + u64 seq_ready = bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal, + pos.inode, pos.offset); + if (seq_ready > c->journal.flushed_seq_ondisk) { + if (seq_ready > c->journal.flushing_seq) + s->need_journal_commit++; goto out; } @@ -1865,23 +1864,24 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, discard_locked = true; } - if (!bkey_eq(*discard_pos_done, iter.pos) && - ca->mi.discard && !c->opts.nochanges) { - /* - * This works without any other locks because this is the only - * thread that removes items from the need_discard tree - */ - bch2_trans_unlock_long(trans); - blkdev_issue_discard(ca->disk_sb.bdev, - k.k->p.offset * ca->mi.bucket_size, - ca->mi.bucket_size, - GFP_KERNEL); - *discard_pos_done = iter.pos; + if (!bkey_eq(*discard_pos_done, iter.pos)) { s->discarded++; + *discard_pos_done = iter.pos; - ret = bch2_trans_relock_notrace(trans); - if (ret) - goto out; + if (ca->mi.discard && !c->opts.nochanges) { + /* + * This works without any other locks because this is the only + * thread that removes items from the need_discard tree + */ + bch2_trans_unlock_long(trans); + blkdev_issue_discard(ca->disk_sb.bdev, + k.k->p.offset * ca->mi.bucket_size, + ca->mi.bucket_size, + GFP_KERNEL); + ret = bch2_trans_relock_notrace(trans); + if (ret) + goto out; + } } SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); @@ -1929,6 +1929,9 @@ static void bch2_do_discards_work(struct work_struct *work) POS(ca->dev_idx, U64_MAX), 0, k, bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false))); + if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal)) + bch2_journal_flush_async(&c->journal, NULL); + trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); @@ -2024,7 +2027,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work) break; } - trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); + trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); bch2_trans_put(trans); percpu_ref_put(&ca->io_ref); diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 6df41c331a52..5a781fb4c794 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -205,8 +205,12 @@ static inline bool may_alloc_bucket(struct bch_fs *c, return false; } - if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, - c->journal.flushed_seq_ondisk, bucket.inode, bucket.offset)) { + u64 journal_seq_ready = + bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal, + bucket.inode, bucket.offset); + if (journal_seq_ready > c->journal.flushed_seq_ondisk) { + if (journal_seq_ready > c->journal.flushing_seq) + s->need_journal_commit++; s->skipped_need_journal_commit++; return false; } @@ -570,7 +574,7 @@ alloc: ? bch2_bucket_alloc_freelist(trans, ca, watermark, &s, cl) : bch2_bucket_alloc_early(trans, ca, watermark, &s, cl); - if (s.skipped_need_journal_commit * 2 > avail) + if (s.need_journal_commit * 2 > avail) bch2_journal_flush_async(&c->journal, NULL); if (!ob && s.btree_bitmap != BTREE_BITMAP_ANY) { diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h index 9bbb28e90b93..4aa8ee026cb8 100644 --- a/fs/bcachefs/alloc_types.h +++ b/fs/bcachefs/alloc_types.h @@ -18,6 +18,7 @@ struct bucket_alloc_state { u64 buckets_seen; u64 skipped_open; u64 skipped_need_journal_commit; + u64 need_journal_commit; u64 skipped_nocow; u64 skipped_nouse; u64 skipped_mi_btree_bitmap; diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index ca755e8d1a37..1ec1f90e0eb3 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -203,7 +203,7 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) return NULL; } - bch2_btree_lock_init(&b->c, 0); + bch2_btree_lock_init(&b->c, 0, GFP_KERNEL); __bch2_btree_node_to_freelist(bc, b); return b; @@ -795,17 +795,18 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea } b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN); - if (!b) { + if (b) { + bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_NOWAIT); + } else { mutex_unlock(&bc->lock); bch2_trans_unlock(trans); b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) goto err; + bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL); mutex_lock(&bc->lock); } - bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0); - BUG_ON(!six_trylock_intent(&b->c.lock)); BUG_ON(!six_trylock_write(&b->c.lock)); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index e371e60e3133..dece27d9db04 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -996,7 +996,7 @@ drop_this_key: } got_good_key: le16_add_cpu(&i->u64s, -next_good_key); - memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k); + memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k); set_btree_node_need_rewrite(b); } fsck_err: diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 5988219c6908..e32fce4fd258 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2357,6 +2357,12 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en bch2_btree_iter_verify_entry_exit(iter); EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX)); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + if (iter->update_path) { bch2_path_put_nokeep(trans, iter->update_path, iter->flags & BTREE_ITER_intent); @@ -2622,6 +2628,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp bch2_btree_iter_verify_entry_exit(iter); EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN)); + int ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + while (1) { k = __bch2_btree_iter_peek_prev(iter, search_key); if (unlikely(!k.k)) @@ -2749,6 +2761,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) bch2_btree_iter_verify_entry_exit(iter); EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache)); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) { + k = bkey_s_c_err(ret); + goto out_no_locked; + } + /* extents can't span inode numbers: */ if ((iter->flags & BTREE_ITER_is_extents) && unlikely(iter->pos.offset == KEY_OFFSET_MAX)) { @@ -3106,6 +3124,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size) WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (ret) + return ERR_PTR(ret); + struct btree_transaction_stats *s = btree_trans_stats(trans); s->max_mem = max(s->max_mem, new_bytes); @@ -3163,7 +3185,8 @@ out_new_mem: if (old_bytes) { trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes); - return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced)); + return ERR_PTR(btree_trans_restart_ip(trans, + BCH_ERR_transaction_restart_mem_realloced, _RET_IP_)); } out_change_top: p = trans->mem + trans->mem_top; @@ -3271,6 +3294,14 @@ u32 bch2_trans_begin(struct btree_trans *trans) trans->last_begin_ip = _RET_IP_; +#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS + if (trans->restarted) { + trans->restart_count_this_trans++; + } else { + trans->restart_count_this_trans = 0; + } +#endif + trans_set_locked(trans, false); if (trans->restarted) { diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index b9538e6e6d65..b96157f3dc9c 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -355,6 +355,18 @@ static int btree_trans_restart(struct btree_trans *trans, int err) return btree_trans_restart_ip(trans, err, _THIS_IP_); } +static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip) +{ +#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS + if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) { + trace_and_count(trans->c, trans_restart_injected, trans, ip); + return btree_trans_restart_ip(trans, + BCH_ERR_transaction_restart_fault_inject, ip); + } +#endif + return 0; +} + bool bch2_btree_node_upgrade(struct btree_trans *, struct btree_path *, unsigned); @@ -739,7 +751,7 @@ transaction_restart: \ if (!_ret2) \ bch2_trans_verify_not_restarted(_trans, _restart_count);\ \ - _ret2 ?: trans_was_restarted(_trans, _restart_count); \ + _ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \ }) #define for_each_btree_key_max_continue(_trans, _iter, \ diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index c378b97ebeca..edce59433375 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -156,7 +156,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k } if (ck) { - bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0); + bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL); ck->c.cached = true; goto lock; } @@ -748,7 +748,6 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) rcu_read_unlock(); mutex_lock(&bc->table.mutex); mutex_unlock(&bc->table.mutex); - rcu_read_lock(); continue; } for (i = 0; i < tbl->size; i++) diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c index 10b805a60f52..caef65adeae4 100644 --- a/fs/bcachefs/btree_locking.c +++ b/fs/bcachefs/btree_locking.c @@ -7,9 +7,10 @@ static struct lock_class_key bch2_btree_node_lock_key; void bch2_btree_lock_init(struct btree_bkey_cached_common *b, - enum six_lock_init_flags flags) + enum six_lock_init_flags flags, + gfp_t gfp) { - __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags); + __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags, gfp); lockdep_set_notrack_class(&b->lock); } diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h index b54ef48eb8cc..b33ab7af8440 100644 --- a/fs/bcachefs/btree_locking.h +++ b/fs/bcachefs/btree_locking.h @@ -13,7 +13,7 @@ #include "btree_iter.h" #include "six.h" -void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags); +void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags, gfp_t gfp); void bch2_trans_unlock_noassert(struct btree_trans *); void bch2_trans_unlock_write(struct btree_trans *); diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 2760dd9569ed..c4f524b2ca9a 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -999,6 +999,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) bch2_trans_verify_not_unlocked_or_in_restart(trans); + ret = trans_maybe_inject_restart(trans, _RET_IP_); + if (unlikely(ret)) + goto out_reset; + if (!trans->nr_updates && !trans->journal_entries_u64s) goto out_reset; diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index a6f251eb4164..a09cbe9cd94f 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -509,6 +509,9 @@ struct btree_trans { bool notrace_relock_fail:1; enum bch_errcode restarted:16; u32 restart_count; +#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS + u32 restart_count_this_trans; +#endif u64 last_begin_time; unsigned long last_begin_ip; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index f4aeadbe53c1..e4e7c804625e 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -681,9 +681,11 @@ static void btree_update_nodes_written(struct btree_update *as) b = as->old_nodes[i]; + bch2_trans_begin(trans); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read); seq = b->data ? b->data->keys.seq : 0; six_unlock_read(&b->c.lock); + bch2_trans_unlock_long(trans); if (seq == as->old_nodes_seq[i]) wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner, diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index 7930ffea3075..26d646e1275c 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -278,12 +278,12 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct bt { struct bset_tree *t = bset_tree_last(b); struct btree_node_entry *bne = max(write_block(b), - (void *) btree_bkey_last(b, bset_tree_last(b))); + (void *) btree_bkey_last(b, t)); ssize_t remaining_space = __bch2_btree_u64s_remaining(b, bne->keys.start); if (unlikely(bset_written(b, bset(b, t)))) { - if (remaining_space > (ssize_t) (block_bytes(c) >> 3)) + if (b->written + block_sectors(c) <= btree_sectors(c)) return bne; } else { if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) && diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index f9fb150eda70..c8a488e6b7b8 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -22,23 +22,21 @@ static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_ memset(t->d, 0, sizeof(t->d[0]) << t->bits); } -bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, - u64 flushed_seq, - unsigned dev, u64 bucket) +u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *b, + unsigned dev, u64 bucket) { struct buckets_waiting_for_journal_table *t; u64 dev_bucket = (u64) dev << 56 | bucket; - bool ret = false; - unsigned i; + u64 ret = 0; mutex_lock(&b->lock); t = b->t; - for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) { + for (unsigned i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) { struct bucket_hashed *h = bucket_hash(t, i, dev_bucket); if (h->dev_bucket == dev_bucket) { - ret = h->journal_seq > flushed_seq; + ret = h->journal_seq; break; } } diff --git a/fs/bcachefs/buckets_waiting_for_journal.h b/fs/bcachefs/buckets_waiting_for_journal.h index d2ae19cbe18c..365619ca44c8 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.h +++ b/fs/bcachefs/buckets_waiting_for_journal.h @@ -4,8 +4,8 @@ #include "buckets_waiting_for_journal_types.h" -bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *, - u64, unsigned, u64); +u64 bch2_bucket_journal_seq_ready(struct buckets_waiting_for_journal *, + unsigned, u64); int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *, u64, unsigned, u64, u64); diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 337494facac6..642fbc60ecab 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -340,6 +340,7 @@ restart_drop_extra_replicas: struct printbuf buf = PRINTBUF; prt_str(&buf, "about to insert invalid key in data update path"); + prt_printf(&buf, "\nop.nonce: %u", m->op.nonce); prt_str(&buf, "\nold: "); bch2_bkey_val_to_text(&buf, c, old); prt_str(&buf, "\nk: "); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index a633f83c1ac7..362b3b2f2f2e 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -31,11 +31,6 @@ static inline unsigned dirent_val_u64s(unsigned len) sizeof(u64)); } -static inline unsigned int dirent_occupied_size(const struct qstr *name) -{ - return (BKEY_U64s + dirent_val_u64s(name->len)) * sizeof(u64); -} - int bch2_dirent_read_target(struct btree_trans *, subvol_inum, struct bkey_s_c_dirent, subvol_inum *); diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 5360cbb3ec29..f4372cafea2e 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -210,11 +210,13 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem * static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p, u64 *v, unsigned nr) { + percpu_down_read(&c->mark_lock); struct bch_accounting_mem *acc = &c->accounting; unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), accounting_pos_cmp, &p); bch2_accounting_mem_read_counters(acc, idx, v, nr, false); + percpu_up_read(&c->mark_lock); } static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset) diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 620b284aa34f..204d765dd74c 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -704,7 +704,7 @@ static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1, ptr1.unwritten == ptr2.unwritten && ptr1.offset == ptr2.offset && ptr1.dev == ptr2.dev && - ptr1.dev == ptr2.dev); + ptr1.gen == ptr2.gen); } void bch2_ptr_swab(struct bkey_s); diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c index d70d9f634cea..2c3d46ac70c6 100644 --- a/fs/bcachefs/fs-common.c +++ b/fs/bcachefs/fs-common.c @@ -152,7 +152,6 @@ int bch2_create_trans(struct btree_trans *trans, if (is_subdir_for_nlink(new_inode)) dir_u->bi_nlink++; dir_u->bi_mtime = dir_u->bi_ctime = now; - dir_u->bi_size += dirent_occupied_size(name); ret = bch2_inode_write(trans, &dir_iter, dir_u); if (ret) @@ -221,7 +220,6 @@ int bch2_link_trans(struct btree_trans *trans, } dir_u->bi_mtime = dir_u->bi_ctime = now; - dir_u->bi_size += dirent_occupied_size(name); dir_hash = bch2_hash_info_init(c, dir_u); @@ -324,7 +322,6 @@ int bch2_unlink_trans(struct btree_trans *trans, dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now; dir_u->bi_nlink -= is_subdir_for_nlink(inode_u); - dir_u->bi_size -= dirent_occupied_size(name); ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, &dir_hash, &dirent_iter, @@ -463,14 +460,6 @@ int bch2_rename_trans(struct btree_trans *trans, goto err; } - if (mode == BCH_RENAME) { - src_dir_u->bi_size -= dirent_occupied_size(src_name); - dst_dir_u->bi_size += dirent_occupied_size(dst_name); - } - - if (mode == BCH_RENAME_OVERWRITE) - src_dir_u->bi_size -= dirent_occupied_size(src_name); - if (src_inode_u->bi_parent_subvol) src_inode_u->bi_parent_subvol = dst_dir.subvol; diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 94bf34b9b65f..717e7b94c66f 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -466,6 +466,7 @@ int bchfs_truncate(struct mnt_idmap *idmap, ret = bch2_truncate_folio(inode, iattr->ia_size); if (unlikely(ret < 0)) goto err; + ret = 0; truncate_setsize(&inode->v, iattr->ia_size); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 53a421ff136d..0e85131d0af8 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -823,6 +823,7 @@ struct inode_walker_entry { struct bch_inode_unpacked inode; u32 snapshot; u64 count; + u64 i_size; }; struct inode_walker { @@ -910,8 +911,9 @@ found: if (k.k->p.snapshot != i->snapshot && !is_whiteout) { struct inode_walker_entry new = *i; - new.snapshot = k.k->p.snapshot; - new.count = 0; + new.snapshot = k.k->p.snapshot; + new.count = 0; + new.i_size = 0; struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, k); @@ -1116,37 +1118,6 @@ err: return ret; } -static int check_directory_size(struct btree_trans *trans, - struct bch_inode_unpacked *inode_u, - struct bkey_s_c inode_k, bool *write_inode) -{ - struct btree_iter iter; - struct bkey_s_c k; - u64 new_size = 0; - int ret; - - for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents, - SPOS(inode_k.k->p.offset, 0, inode_k.k->p.snapshot), - POS(inode_k.k->p.offset, U64_MAX), - 0, k, ret) { - if (k.k->type != KEY_TYPE_dirent) - continue; - - struct bkey_s_c_dirent dirent = bkey_s_c_to_dirent(k); - struct qstr name = bch2_dirent_get_name(dirent); - - new_size += dirent_occupied_size(&name); - } - bch2_trans_iter_exit(trans, &iter); - - if (!ret && inode_u->bi_size != new_size) { - inode_u->bi_size = new_size; - *write_inode = true; - } - - return ret; -} - static int check_inode(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, @@ -1335,16 +1306,6 @@ static int check_inode(struct btree_trans *trans, u.bi_journal_seq = journal_cur_seq(&c->journal); do_update = true; } - - if (S_ISDIR(u.bi_mode)) { - ret = check_directory_size(trans, &u, k, &do_update); - - fsck_err_on(ret, - trans, directory_size_mismatch, - "directory inode %llu:%u with the mismatch directory size", - u.bi_inum, k.k->p.snapshot); - ret = 0; - } do_update: if (do_update) { ret = __bch2_fsck_write_inode(trans, &u); @@ -2017,7 +1978,7 @@ fsck_err: return ret; } -static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) +static int check_subdir_dirents_count(struct btree_trans *trans, struct inode_walker *w) { u32 restart_count = trans->restart_count; return check_subdir_count_notnested(trans, w) ?: @@ -2367,7 +2328,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, goto out; if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) { - ret = check_subdir_count(trans, dir); + ret = check_subdir_dirents_count(trans, dir); if (ret) goto err; } @@ -2457,9 +2418,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, if (ret) goto err; - if (d.v->d_type == DT_DIR) - for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) + for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) { + if (d.v->d_type == DT_DIR) i->count++; + i->i_size += bkey_bytes(d.k); + } out: err: fsck_err: diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index d2e134528f0e..428b9be6af34 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -285,12 +285,14 @@ void bch2_inode_opts_get(struct bch_io_opts *, struct bch_fs *, struct bch_inode_unpacked *); int bch2_inum_opts_get(struct btree_trans*, subvol_inum, struct bch_io_opts *); +#include "rebalance.h" + static inline struct bch_extent_rebalance bch2_inode_rebalance_opts_get(struct bch_fs *c, struct bch_inode_unpacked *inode) { struct bch_io_opts io_opts; bch2_inode_opts_get(&io_opts, c, inode); - return io_opts_to_rebalance_opts(&io_opts); + return io_opts_to_rebalance_opts(c, &io_opts); } int bch2_inode_rm_snapshot(struct btree_trans *, u64, u32); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index dd508d93e9fc..03892388832b 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -411,6 +411,16 @@ void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op) __bch2_write_op_error(out, op, op->pos.offset); } +static void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out, + struct bch_write_op *op, u64 offset) +{ + bch2_inum_offset_err_msg_trans(trans, out, + (subvol_inum) { op->subvol, op->pos.inode, }, + offset << 9); + prt_printf(out, "write error%s: ", + op->flags & BCH_WRITE_MOVE ? "(internal move)" : ""); +} + void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, enum bch_data_type type, const struct bkey_i *k, @@ -1193,7 +1203,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op) struct bkey_i *insert = bch2_keylist_front(&op->insert_keys); struct printbuf buf = PRINTBUF; - __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k)); + bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k)); prt_printf(&buf, "btree update error: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); printbuf_exit(&buf); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index cb2c3722f674..5dabbf3c0965 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -319,6 +319,16 @@ void bch2_journal_halt(struct journal *j) spin_unlock(&j->lock); } +void bch2_journal_halt_locked(struct journal *j) +{ + lockdep_assert_held(&j->lock); + + __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL, true); + if (!j->err_seq) + j->err_seq = journal_cur_seq(j); + journal_wake(j); +} + static bool journal_entry_want_write(struct journal *j) { bool ret = !journal_entry_is_open(j) || @@ -381,9 +391,12 @@ static int journal_entry_open(struct journal *j) if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf)) return JOURNAL_ERR_max_in_flight; - if (bch2_fs_fatal_err_on(journal_cur_seq(j) >= JOURNAL_SEQ_MAX, - c, "cannot start: journal seq overflow")) + if (journal_cur_seq(j) >= JOURNAL_SEQ_MAX) { + bch_err(c, "cannot start: journal seq overflow"); + if (bch2_fs_emergency_read_only_locked(c)) + bch_err(c, "fatal error - emergency read only"); return JOURNAL_ERR_insufficient_devices; /* -EROFS */ + } BUG_ON(!j->cur_entry_sectors); @@ -783,6 +796,7 @@ recheck_need_open: } buf->must_flush = true; + j->flushing_seq = max(j->flushing_seq, seq); if (parent && !closure_wait(&buf->wait, parent)) BUG(); @@ -1180,7 +1194,9 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca, closure_sync(&cl); - if (ret && ret != -BCH_ERR_bucket_alloc_blocked) + if (ret && + ret != -BCH_ERR_bucket_alloc_blocked && + ret != -BCH_ERR_open_buckets_empty) break; } diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h index dccddd5420ad..107f7f901cd9 100644 --- a/fs/bcachefs/journal.h +++ b/fs/bcachefs/journal.h @@ -409,6 +409,7 @@ bool bch2_journal_noflush_seq(struct journal *, u64, u64); int bch2_journal_meta(struct journal *); void bch2_journal_halt(struct journal *); +void bch2_journal_halt_locked(struct journal *); static inline int bch2_journal_error(struct journal *j) { diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 6a9cefb635d6..d373cd181a7f 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -384,12 +384,16 @@ void bch2_journal_pin_drop(struct journal *j, spin_unlock(&j->lock); } -static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn) +static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin, + journal_pin_flush_fn fn) { if (fn == bch2_btree_node_flush0 || - fn == bch2_btree_node_flush1) - return JOURNAL_PIN_TYPE_btree; - else if (fn == bch2_btree_key_cache_journal_flush) + fn == bch2_btree_node_flush1) { + unsigned idx = fn == bch2_btree_node_flush1; + struct btree *b = container_of(pin, struct btree, writes[idx].journal); + + return JOURNAL_PIN_TYPE_btree0 - b->c.level; + } else if (fn == bch2_btree_key_cache_journal_flush) return JOURNAL_PIN_TYPE_key_cache; else return JOURNAL_PIN_TYPE_other; @@ -441,7 +445,7 @@ void bch2_journal_pin_copy(struct journal *j, bool reclaim = __journal_pin_drop(j, dst); - bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn)); + bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(dst, flush_fn)); if (reclaim) bch2_journal_reclaim_fast(j); @@ -465,7 +469,7 @@ void bch2_journal_pin_set(struct journal *j, u64 seq, bool reclaim = __journal_pin_drop(j, pin); - bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn)); + bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn)); if (reclaim) bch2_journal_reclaim_fast(j); @@ -587,7 +591,7 @@ static size_t journal_flush_pins(struct journal *j, spin_lock(&j->lock); /* Pin might have been dropped or rearmed: */ if (likely(!err && !j->flush_in_progress_dropped)) - list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]); + list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]); j->flush_in_progress = NULL; j->flush_in_progress_dropped = false; spin_unlock(&j->lock); @@ -869,18 +873,13 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush, mutex_lock(&j->reclaim_lock); - if (journal_flush_pins_or_still_flushing(j, seq_to_flush, - BIT(JOURNAL_PIN_TYPE_key_cache)| - BIT(JOURNAL_PIN_TYPE_other))) { - *did_work = true; - goto unlock; - } - - if (journal_flush_pins_or_still_flushing(j, seq_to_flush, - BIT(JOURNAL_PIN_TYPE_btree))) { - *did_work = true; - goto unlock; - } + for (int type = JOURNAL_PIN_TYPE_NR - 1; + type >= 0; + --type) + if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) { + *did_work = true; + goto unlock; + } if (seq_to_flush > journal_cur_seq(j)) bch2_journal_entry_close(j); diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 3ba433a48eb8..1ef3a28ed6ab 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -53,7 +53,10 @@ struct journal_buf { */ enum journal_pin_type { - JOURNAL_PIN_TYPE_btree, + JOURNAL_PIN_TYPE_btree3, + JOURNAL_PIN_TYPE_btree2, + JOURNAL_PIN_TYPE_btree1, + JOURNAL_PIN_TYPE_btree0, JOURNAL_PIN_TYPE_key_cache, JOURNAL_PIN_TYPE_other, JOURNAL_PIN_TYPE_NR, @@ -237,6 +240,7 @@ struct journal { /* seq, last_seq from the most recent journal entry successfully written */ u64 seq_ondisk; u64 flushed_seq_ondisk; + u64 flushing_seq; u64 last_seq_ondisk; u64 err_seq; u64 last_empty_seq; diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index a182b5d454ba..9d397fc2a1f0 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -659,18 +659,4 @@ static inline void bch2_io_opts_fixups(struct bch_io_opts *opts) struct bch_io_opts bch2_opts_to_inode_opts(struct bch_opts); bool bch2_opt_is_inode_opt(enum bch_opt_id); -/* rebalance opts: */ - -static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_io_opts *opts) -{ - return (struct bch_extent_rebalance) { - .type = BIT(BCH_EXTENT_ENTRY_rebalance), -#define x(_name) \ - ._name = opts->_name, \ - ._name##_from_inode = opts->_name##_from_inode, - BCH_REBALANCE_OPTS() -#undef x - }; -}; - #endif /* _BCACHEFS_OPTS_H */ diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 4adc74cd3f70..d0a1f5cd5c2b 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -121,12 +121,10 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) } } incompressible: - if (opts->background_target && - bch2_target_accepts_data(c, BCH_DATA_user, opts->background_target)) { + if (opts->background_target) bkey_for_each_ptr_decode(k.k, ptrs, p, entry) if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target)) sectors += p.crc.compressed_size; - } return sectors; } @@ -140,7 +138,7 @@ static bool bch2_bkey_rebalance_needs_update(struct bch_fs *c, struct bch_io_opt const struct bch_extent_rebalance *old = bch2_bkey_rebalance_opts(k); if (k.k->type == KEY_TYPE_reflink_v || bch2_bkey_ptrs_need_rebalance(c, opts, k)) { - struct bch_extent_rebalance new = io_opts_to_rebalance_opts(opts); + struct bch_extent_rebalance new = io_opts_to_rebalance_opts(c, opts); return old == NULL || memcmp(old, &new, sizeof(new)); } else { return old != NULL; @@ -163,7 +161,7 @@ int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bch_io_opts *opts, k.k->u64s += sizeof(*old) / sizeof(u64); } - *old = io_opts_to_rebalance_opts(opts); + *old = io_opts_to_rebalance_opts(c, opts); } else { if (old) extent_entry_drop(k, (union bch_extent_entry *) old); diff --git a/fs/bcachefs/rebalance.h b/fs/bcachefs/rebalance.h index 0a0821ab895d..62a3859d3823 100644 --- a/fs/bcachefs/rebalance.h +++ b/fs/bcachefs/rebalance.h @@ -4,8 +4,28 @@ #include "compress.h" #include "disk_groups.h" +#include "opts.h" #include "rebalance_types.h" +static inline struct bch_extent_rebalance io_opts_to_rebalance_opts(struct bch_fs *c, + struct bch_io_opts *opts) +{ + struct bch_extent_rebalance r = { + .type = BIT(BCH_EXTENT_ENTRY_rebalance), +#define x(_name) \ + ._name = opts->_name, \ + ._name##_from_inode = opts->_name##_from_inode, + BCH_REBALANCE_OPTS() +#undef x + }; + + if (r.background_target && + !bch2_target_accepts_data(c, BCH_DATA_user, r.background_target)) + r.background_target = 0; + + return r; +}; + u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bch_io_opts *, struct bkey_i *); int bch2_get_update_rebalance_opts(struct btree_trans *, diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 93ba4f4e47ca..441e648f28b5 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -172,7 +172,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans, bool should_commit) { if (REFLINK_P_ERROR(p.v)) - return -BCH_ERR_missing_indirect_extent; + return 0; struct bch_fs *c = trans->c; u64 live_start = REFLINK_P_IDX(p.v); @@ -259,8 +259,6 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, return k; if (unlikely(!bkey_extent_is_reflink_data(k.k))) { - bch2_trans_iter_exit(trans, iter); - unsigned size = min((u64) k.k->size, REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) - reflink_offset); @@ -268,14 +266,16 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans, int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset, k.k->p.offset, should_commit); - if (ret) + if (ret) { + bch2_trans_iter_exit(trans, iter); return bkey_s_c_err(ret); + } } else if (unlikely(REFLINK_P_ERROR(p.v))) { - bch2_trans_iter_exit(trans, iter); - int ret = bch2_indirect_extent_not_missing(trans, p, should_commit); - if (ret) + if (ret) { + bch2_trans_iter_exit(trans, iter); return bkey_s_c_err(ret); + } } *offset_into_extent = reflink_offset - bkey_start_offset(k.k); @@ -300,7 +300,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans, if (ret) return ret; - if (bkey_deleted(k.k)) { + if (!bkey_refcount_c(k)) { if (!(flags & BTREE_TRIGGER_overwrite)) ret = -BCH_ERR_missing_indirect_extent; goto next; diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 14f6b6a5fb38..051214fdc735 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -90,10 +90,7 @@ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_accounting_mismatch, \ BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ - BCH_FSCK_ERR_accounting_key_junk_at_end) \ - x(directory_size, \ - BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ - BCH_FSCK_ERR_directory_size_mismatch) \ + BCH_FSCK_ERR_accounting_key_junk_at_end) #define DOWNGRADE_TABLE() \ x(bucket_stripe_sectors, \ diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index ea0a18364751..b86ec013d7d7 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -180,9 +180,9 @@ enum bch_fsck_flags { x(ptr_crc_nonce_mismatch, 162, 0) \ x(ptr_stripe_redundant, 163, 0) \ x(reservation_key_nr_replicas_invalid, 164, 0) \ - x(reflink_v_refcount_wrong, 165, 0) \ + x(reflink_v_refcount_wrong, 165, FSCK_AUTOFIX) \ x(reflink_v_pos_bad, 292, 0) \ - x(reflink_p_to_missing_reflink_v, 166, 0) \ + x(reflink_p_to_missing_reflink_v, 166, FSCK_AUTOFIX) \ x(reflink_refcount_underflow, 293, 0) \ x(stripe_pos_bad, 167, 0) \ x(stripe_val_size_bad, 168, 0) \ diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c index 7e7c66a1e1a6..7c403427fbdb 100644 --- a/fs/bcachefs/six.c +++ b/fs/bcachefs/six.c @@ -850,7 +850,8 @@ void six_lock_exit(struct six_lock *lock) EXPORT_SYMBOL_GPL(six_lock_exit); void __six_lock_init(struct six_lock *lock, const char *name, - struct lock_class_key *key, enum six_lock_init_flags flags) + struct lock_class_key *key, enum six_lock_init_flags flags, + gfp_t gfp) { atomic_set(&lock->state, 0); raw_spin_lock_init(&lock->wait_lock); @@ -873,7 +874,7 @@ void __six_lock_init(struct six_lock *lock, const char *name, * failure if they wish by checking lock->readers, but generally * will not want to treat it as an error. */ - lock->readers = alloc_percpu(unsigned); + lock->readers = alloc_percpu_gfp(unsigned, gfp); } #endif } diff --git a/fs/bcachefs/six.h b/fs/bcachefs/six.h index c142e06b7a3a..59b851cf8bac 100644 --- a/fs/bcachefs/six.h +++ b/fs/bcachefs/six.h @@ -164,18 +164,19 @@ enum six_lock_init_flags { }; void __six_lock_init(struct six_lock *lock, const char *name, - struct lock_class_key *key, enum six_lock_init_flags flags); + struct lock_class_key *key, enum six_lock_init_flags flags, + gfp_t gfp); /** * six_lock_init - initialize a six lock * @lock: lock to initialize * @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU */ -#define six_lock_init(lock, flags) \ +#define six_lock_init(lock, flags, gfp) \ do { \ static struct lock_class_key __key; \ \ - __six_lock_init((lock), #lock, &__key, flags); \ + __six_lock_init((lock), #lock, &__key, flags, gfp); \ } while (0) /** diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index e3d0475232e5..b7b96283c316 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -428,7 +428,7 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) bch2_bkey_get_iter_typed(trans, &snapshot_iter, BTREE_ID_snapshots, POS(0, snapid), 0, snapshot); - ret = bkey_err(subvol); + ret = bkey_err(snapshot); bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, "missing snapshot %u", snapid); if (ret) @@ -440,6 +440,11 @@ static int __bch2_subvolume_delete(struct btree_trans *trans, u32 subvolid) bch2_bkey_get_iter_typed(trans, &snapshot_tree_iter, BTREE_ID_snapshot_trees, POS(0, treeid), 0, snapshot_tree); + ret = bkey_err(snapshot_tree); + bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, + "missing snapshot tree %u", treeid); + if (ret) + goto err; if (le32_to_cpu(snapshot_tree.v->master_subvol) == subvolid) { struct bkey_i_snapshot_tree *snapshot_tree_mut = diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index d97ea7bd1171..6d97d412fed9 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -411,6 +411,17 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c) return ret; } +bool bch2_fs_emergency_read_only_locked(struct bch_fs *c) +{ + bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags); + + bch2_journal_halt_locked(&c->journal); + bch2_fs_read_only_async(c); + + wake_up(&bch2_read_only_wait); + return ret; +} + static int bch2_fs_read_write_late(struct bch_fs *c) { int ret; diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h index fa6d52216510..04f8287eff5c 100644 --- a/fs/bcachefs/super.h +++ b/fs/bcachefs/super.h @@ -29,6 +29,7 @@ int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64); struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *); bool bch2_fs_emergency_read_only(struct bch_fs *); +bool bch2_fs_emergency_read_only_locked(struct bch_fs *); void bch2_fs_read_only(struct bch_fs *); int bch2_fs_read_write(struct bch_fs *); diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 56a5a7fbc0fd..c1b51009edf6 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -727,7 +727,7 @@ DEFINE_EVENT(fs_str, bucket_alloc_fail, TP_ARGS(c, str) ); -TRACE_EVENT(discard_buckets, +DECLARE_EVENT_CLASS(discard_buckets_class, TP_PROTO(struct bch_fs *c, u64 seen, u64 open, u64 need_journal_commit, u64 discarded, const char *err), TP_ARGS(c, seen, open, need_journal_commit, discarded, err), @@ -759,6 +759,18 @@ TRACE_EVENT(discard_buckets, __entry->err) ); +DEFINE_EVENT(discard_buckets_class, discard_buckets, + TP_PROTO(struct bch_fs *c, u64 seen, u64 open, + u64 need_journal_commit, u64 discarded, const char *err), + TP_ARGS(c, seen, open, need_journal_commit, discarded, err) +); + +DEFINE_EVENT(discard_buckets_class, discard_buckets_fast, + TP_PROTO(struct bch_fs *c, u64 seen, u64 open, + u64 need_journal_commit, u64 discarded, const char *err), + TP_ARGS(c, seen, open, need_journal_commit, discarded, err) +); + TRACE_EVENT(bucket_invalidate, TP_PROTO(struct bch_fs *c, unsigned dev, u64 bucket, u32 sectors), TP_ARGS(c, dev, bucket, sectors), diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 92071ca0655f..3dc5a35dd19b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1496,6 +1496,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (!p->skip_locking) { btrfs_unlock_up_safe(p, parent_level + 1); + btrfs_maybe_reset_lockdep_class(root, tmp); tmp_locked = true; btrfs_tree_read_lock(tmp); btrfs_release_path(p); @@ -1539,6 +1540,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (!p->skip_locking) { ASSERT(ret == -EAGAIN); + btrfs_maybe_reset_lockdep_class(root, tmp); tmp_locked = true; btrfs_tree_read_lock(tmp); btrfs_release_path(p); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a2cac9d0a1a9..b2fae67f8fa3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -523,8 +523,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) u64 end; u32 len; - /* For now only order 0 folios are supported for data. */ - ASSERT(folio_order(folio) == 0); btrfs_debug(fs_info, "%s: bi_sector=%llu, err=%d, mirror=%u", __func__, bio->bi_iter.bi_sector, bio->bi_status, @@ -552,7 +550,6 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) if (likely(uptodate)) { loff_t i_size = i_size_read(inode); - pgoff_t end_index = i_size >> folio_shift(folio); /* * Zero out the remaining part if this range straddles @@ -561,9 +558,11 @@ static void end_bbio_data_read(struct btrfs_bio *bbio) * Here we should only zero the range inside the folio, * not touch anything else. * - * NOTE: i_size is exclusive while end is inclusive. + * NOTE: i_size is exclusive while end is inclusive and + * folio_contains() takes PAGE_SIZE units. */ - if (folio_index(folio) == end_index && i_size <= end) { + if (folio_contains(folio, i_size >> PAGE_SHIFT) && + i_size <= end) { u32 zero_start = max(offset_in_folio(folio, i_size), offset_in_folio(folio, start)); u32 zero_len = offset_in_folio(folio, end) + 1 - @@ -899,7 +898,6 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode, u64 len, struct extent_map **em_cached) { struct extent_map *em; - struct extent_state *cached_state = NULL; ASSERT(em_cached); @@ -915,14 +913,12 @@ static struct extent_map *get_extent_map(struct btrfs_inode *inode, *em_cached = NULL; } - btrfs_lock_and_flush_ordered_range(inode, start, start + len - 1, &cached_state); em = btrfs_get_extent(inode, folio, start, len); if (!IS_ERR(em)) { BUG_ON(*em_cached); refcount_inc(&em->refs); *em_cached = em; } - unlock_extent(&inode->io_tree, start, start + len - 1, &cached_state); return em; } @@ -956,7 +952,7 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, return ret; } - if (folio->index == last_byte >> folio_shift(folio)) { + if (folio_contains(folio, last_byte >> PAGE_SHIFT)) { size_t zero_offset = offset_in_folio(folio, last_byte); if (zero_offset) { @@ -1079,11 +1075,18 @@ static int btrfs_do_readpage(struct folio *folio, struct extent_map **em_cached, int btrfs_read_folio(struct file *file, struct folio *folio) { + struct btrfs_inode *inode = folio_to_inode(folio); + const u64 start = folio_pos(folio); + const u64 end = start + folio_size(folio) - 1; + struct extent_state *cached_state = NULL; struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ }; struct extent_map *em_cached = NULL; int ret; + btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state); ret = btrfs_do_readpage(folio, &em_cached, &bio_ctrl, NULL); + unlock_extent(&inode->io_tree, start, end, &cached_state); + free_extent_map(em_cached); /* @@ -2380,12 +2383,20 @@ void btrfs_readahead(struct readahead_control *rac) { struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ | REQ_RAHEAD }; struct folio *folio; + struct btrfs_inode *inode = BTRFS_I(rac->mapping->host); + const u64 start = readahead_pos(rac); + const u64 end = start + readahead_length(rac) - 1; + struct extent_state *cached_state = NULL; struct extent_map *em_cached = NULL; u64 prev_em_start = (u64)-1; + btrfs_lock_and_flush_ordered_range(inode, start, end, &cached_state); + while ((folio = readahead_folio(rac)) != NULL) btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start); + unlock_extent(&inode->io_tree, start, end, &cached_state); + if (em_cached) free_extent_map(em_cached); submit_one_bio(&bio_ctrl); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 67ce85ff0ae2..7f46abbd6311 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -1128,6 +1128,8 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c long nr_dropped = 0; struct rb_node *node; + lockdep_assert_held_write(&tree->lock); + /* * Take the mmap lock so that we serialize with the inode logging phase * of fsync because we may need to set the full sync flag on the inode, @@ -1139,28 +1141,12 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c * to find new extents, which may not be there yet because ordered * extents haven't completed yet. * - * We also do a try lock because otherwise we could deadlock. This is - * because the shrinker for this filesystem may be invoked while we are - * in a path that is holding the mmap lock in write mode. For example in - * a reflink operation while COWing an extent buffer, when allocating - * pages for a new extent buffer and under memory pressure, the shrinker - * may be invoked, and therefore we would deadlock by attempting to read - * lock the mmap lock while we are holding already a write lock on it. + * We also do a try lock because we don't want to block for too long and + * we are holding the extent map tree's lock in write mode. */ if (!down_read_trylock(&inode->i_mmap_lock)) return 0; - /* - * We want to be fast so if the lock is busy we don't want to spend time - * waiting for it - either some task is about to do IO for the inode or - * we may have another task shrinking extent maps, here in this code, so - * skip this inode. - */ - if (!write_trylock(&tree->lock)) { - up_read(&inode->i_mmap_lock); - return 0; - } - node = rb_first(&tree->root); while (node) { struct rb_node *next = rb_next(node); @@ -1201,12 +1187,61 @@ next: break; node = next; } - write_unlock(&tree->lock); up_read(&inode->i_mmap_lock); return nr_dropped; } +static struct btrfs_inode *find_first_inode_to_shrink(struct btrfs_root *root, + u64 min_ino) +{ + struct btrfs_inode *inode; + unsigned long from = min_ino; + + xa_lock(&root->inodes); + while (true) { + struct extent_map_tree *tree; + + inode = xa_find(&root->inodes, &from, ULONG_MAX, XA_PRESENT); + if (!inode) + break; + + tree = &inode->extent_tree; + + /* + * We want to be fast so if the lock is busy we don't want to + * spend time waiting for it (some task is about to do IO for + * the inode). + */ + if (!write_trylock(&tree->lock)) + goto next; + + /* + * Skip inode if it doesn't have loaded extent maps, so we avoid + * getting a reference and doing an iput later. This includes + * cases like files that were opened for things like stat(2), or + * files with all extent maps previously released through the + * release folio callback (btrfs_release_folio()) or released in + * a previous run, or directories which never have extent maps. + */ + if (RB_EMPTY_ROOT(&tree->root)) { + write_unlock(&tree->lock); + goto next; + } + + if (igrab(&inode->vfs_inode)) + break; + + write_unlock(&tree->lock); +next: + from = btrfs_ino(inode) + 1; + cond_resched_lock(&root->inodes.xa_lock); + } + xa_unlock(&root->inodes); + + return inode; +} + static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx *ctx) { struct btrfs_fs_info *fs_info = root->fs_info; @@ -1214,21 +1249,21 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx long nr_dropped = 0; u64 min_ino = fs_info->em_shrinker_last_ino + 1; - inode = btrfs_find_first_inode(root, min_ino); + inode = find_first_inode_to_shrink(root, min_ino); while (inode) { nr_dropped += btrfs_scan_inode(inode, ctx); + write_unlock(&inode->extent_tree.lock); min_ino = btrfs_ino(inode) + 1; fs_info->em_shrinker_last_ino = btrfs_ino(inode); - btrfs_add_delayed_iput(inode); + iput(&inode->vfs_inode); - if (ctx->scanned >= ctx->nr_to_scan || - btrfs_fs_closing(inode->root->fs_info)) + if (ctx->scanned >= ctx->nr_to_scan || btrfs_fs_closing(fs_info)) break; cond_resched(); - inode = btrfs_find_first_inode(root, min_ino); + inode = find_first_inode_to_shrink(root, min_ino); } if (inode) { diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 36f51c311bb1..0b568c8d24cb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1039,7 +1039,6 @@ int btrfs_write_check(struct kiocb *iocb, size_t count) loff_t pos = iocb->ki_pos; int ret; loff_t oldsize; - loff_t start_pos; /* * Quickly bail out on NOWAIT writes if we don't have the nodatacow or @@ -1066,9 +1065,8 @@ int btrfs_write_check(struct kiocb *iocb, size_t count) inode_inc_iversion(inode); } - start_pos = round_down(pos, fs_info->sectorsize); oldsize = i_size_read(inode); - if (start_pos > oldsize) { + if (pos > oldsize) { /* Expand hole size to cover write data, preventing empty gap */ loff_t end_pos = round_up(pos + count, fs_info->sectorsize); @@ -1092,7 +1090,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) u64 lockend; size_t num_written = 0; ssize_t ret; - loff_t old_isize = i_size_read(inode); + loff_t old_isize; unsigned int ilock_flags = 0; const bool nowait = (iocb->ki_flags & IOCB_NOWAIT); unsigned int bdp_flags = (nowait ? BDP_ASYNC : 0); @@ -1105,6 +1103,13 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i) if (ret < 0) return ret; + /* + * We can only trust the isize with inode lock held, or it can race with + * other buffered writes and cause incorrect call of + * pagecache_isize_extended() to overwrite existing data. + */ + old_isize = i_size_read(inode); + ret = generic_write_checks(iocb, i); if (ret <= 0) goto out; diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 30eceaf829a7..4aca7475fd82 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1229,6 +1229,18 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( */ if (WARN_ON_ONCE(len >= ordered->num_bytes)) return ERR_PTR(-EINVAL); + /* + * If our ordered extent had an error there's no point in continuing. + * The error may have come from a transaction abort done either by this + * task or some other concurrent task, and the transaction abort path + * iterates over all existing ordered extents and sets the flag + * BTRFS_ORDERED_IOERR on them. + */ + if (unlikely(flags & (1U << BTRFS_ORDERED_IOERR))) { + const int fs_error = BTRFS_FS_ERROR(fs_info); + + return fs_error ? ERR_PTR(fs_error) : ERR_PTR(-EIO); + } /* We cannot split partially completed ordered extents. */ if (ordered->bytes_left) { ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS)); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b90fabe302e6..f9d3766c809b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1880,11 +1880,7 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su * Commit current transaction to make sure all the rfer/excl numbers * get updated. */ - trans = btrfs_start_transaction(fs_info->quota_root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - ret = btrfs_commit_transaction(trans); + ret = btrfs_commit_current_transaction(fs_info->quota_root); if (ret < 0) return ret; @@ -1897,8 +1893,11 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su /* * It's squota and the subvolume still has numbers needed for future * accounting, in this case we can not delete it. Just skip it. + * + * Or the qgroup is already removed by a qgroup rescan. For both cases we're + * safe to ignore them. */ - if (ret == -EBUSY) + if (ret == -EBUSY || ret == -ENOENT) ret = 0; return ret; } diff --git a/fs/btrfs/tests/delayed-refs-tests.c b/fs/btrfs/tests/delayed-refs-tests.c index 6558508c2ddf..265370e79a54 100644 --- a/fs/btrfs/tests/delayed-refs-tests.c +++ b/fs/btrfs/tests/delayed-refs-tests.c @@ -1009,6 +1009,7 @@ int btrfs_test_delayed_refs(u32 sectorsize, u32 nodesize) if (!ret) ret = select_delayed_refs_test(&trans); + kfree(transaction); out_free_fs_info: btrfs_free_dummy_fs_info(fs_info); return ret; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 15312013f2a3..aca83a98b75a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -274,8 +274,10 @@ loop: cur_trans = fs_info->running_transaction; if (cur_trans) { if (TRANS_ABORTED(cur_trans)) { + const int abort_error = cur_trans->aborted; + spin_unlock(&fs_info->trans_lock); - return cur_trans->aborted; + return abort_error; } if (btrfs_blocked_trans_types[cur_trans->state] & type) { spin_unlock(&fs_info->trans_lock); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0a0776489055..fb22d4425cb0 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7200,8 +7200,12 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info, fs_devices = find_fsid(fsid, NULL); if (!fs_devices) { - if (!btrfs_test_opt(fs_info, DEGRADED)) + if (!btrfs_test_opt(fs_info, DEGRADED)) { + btrfs_err(fs_info, + "failed to find fsid %pU when attempting to open seed devices", + fsid); return ERR_PTR(-ENOENT); + } fs_devices = alloc_fs_devices(fsid); if (IS_ERR(fs_devices)) diff --git a/fs/dcache.c b/fs/dcache.c index 9cc0d47da321..e3634916ffb9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1700,7 +1700,7 @@ static struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) smp_store_release(&dentry->d_name.name, dname); /* ^^^ */ dentry->d_flags = 0; - lockref_init(&dentry->d_lockref, 1); + lockref_init(&dentry->d_lockref); seqcount_spinlock_init(&dentry->d_seq, &dentry->d_lock); dentry->d_inode = NULL; dentry->d_parent = dentry; @@ -2966,11 +2966,11 @@ static int __d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_rwsem; out_unalias: - if (alias->d_op->d_unalias_trylock && + if (alias->d_op && alias->d_op->d_unalias_trylock && !alias->d_op->d_unalias_trylock(alias)) goto out_err; __d_move(alias, dentry, false); - if (alias->d_op->d_unalias_unlock) + if (alias->d_op && alias->d_op->d_unalias_unlock) alias->d_op->d_unalias_unlock(alias); ret = 0; out_err: diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index cb1b6d0c3454..c294a8fc566d 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -57,10 +57,11 @@ static ssize_t efivarfs_file_write(struct file *file, if (bytes == -ENOENT) { /* - * zero size signals to release that the write deleted - * the variable + * FIXME: temporary workaround for fwupdate, signal + * failed write with a 1 to keep created but not + * written files */ - i_size_write(inode, 0); + i_size_write(inode, 1); } else { i_size_write(inode, datasize + sizeof(attributes)); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); @@ -124,7 +125,8 @@ static int efivarfs_file_release(struct inode *inode, struct file *file) struct efivar_entry *var = inode->i_private; inode_lock(inode); - var->removed = (--var->open_count == 0 && i_size_read(inode) == 0); + /* FIXME: temporary work around for fwupdate */ + var->removed = (--var->open_count == 0 && i_size_read(inode) == 1); inode_unlock(inode); if (var->removed) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 09fcf731e65d..6eae8cf655c1 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -367,6 +367,8 @@ static int efivarfs_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; + register_pm_notifier(&sfi->pm_nb); + return efivar_init(efivarfs_callback, sb, true); } @@ -552,7 +554,6 @@ static int efivarfs_init_fs_context(struct fs_context *fc) sfi->pm_nb.notifier_call = efivarfs_pm_notify; sfi->pm_nb.priority = 0; - register_pm_notifier(&sfi->pm_nb); return 0; } diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 29f8963bb523..d771e06db738 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -726,7 +726,7 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe) if (IS_ERR(pcl)) return PTR_ERR(pcl); - lockref_init(&pcl->lockref, 1); /* one ref for this request */ + lockref_init(&pcl->lockref); /* one ref for this request */ pcl->algorithmformat = map->m_algorithmformat; pcl->length = 0; pcl->partial = true; diff --git a/fs/file_table.c b/fs/file_table.c index f0291a66f9db..5c00dc38558d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -194,6 +194,11 @@ static int init_file(struct file *f, int flags, const struct cred *cred) * refcount bumps we should reinitialize the reused file first. */ file_ref_init(&f->f_ref, 1); + /* + * Disable permission and pre-content events for all files by default. + * They may be enabled later by file_set_fsnotify_mode_from_watchers(). + */ + file_set_fsnotify_mode(f, FMODE_NONOTIFY_PERM); return 0; } @@ -375,7 +380,13 @@ struct file *alloc_file_pseudo(struct inode *inode, struct vfsmount *mnt, if (IS_ERR(file)) { ihold(inode); path_put(&path); + return file; } + /* + * Disable all fsnotify events for pseudo files by default. + * They may be enabled by caller with file_set_fsnotify_mode(). + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY); return file; } EXPORT_SYMBOL(alloc_file_pseudo); @@ -400,6 +411,11 @@ struct file *alloc_file_pseudo_noaccount(struct inode *inode, return file; } file_init_path(file, &path, fops); + /* + * Disable all fsnotify events for pseudo files by default. + * They may be enabled by caller with file_set_fsnotify_mode(). + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY); return file; } EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 5b5f789b37eb..2b2d1b755544 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -838,6 +838,12 @@ static int fuse_check_folio(struct folio *folio) return 0; } +/* + * Attempt to steal a page from the splice() pipe and move it into the + * pagecache. If successful, the pointer in @pagep will be updated. The + * folio that was originally in @pagep will lose a reference and the new + * folio returned in @pagep will carry a reference. + */ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) { int err; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 198862b086ff..3805f9b06c9d 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1636,7 +1636,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, goto out_err; if (fc->cache_symlinks) - return page_get_link(dentry, inode, callback); + return page_get_link_raw(dentry, inode, callback); err = -ECHILD; if (!dentry) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7d92a5479998..d63e56fd3dd2 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -955,8 +955,10 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, fuse_invalidate_atime(inode); } - for (i = 0; i < ap->num_folios; i++) + for (i = 0; i < ap->num_folios; i++) { folio_end_read(ap->folios[i], !err); + folio_put(ap->folios[i]); + } if (ia->ff) fuse_file_put(ia->ff, false); @@ -1048,7 +1050,14 @@ static void fuse_readahead(struct readahead_control *rac) ap = &ia->ap; while (ap->num_folios < cur_pages) { - folio = readahead_folio(rac); + /* + * This returns a folio with a ref held on it. + * The ref needs to be held until the request is + * completed, since the splice case (see + * fuse_try_move_page()) drops the ref after it's + * replaced in the page cache. + */ + folio = __readahead_folio(rac); ap->folios[ap->num_folios] = folio; ap->descs[ap->num_folios].length = folio_size(folio); ap->num_folios++; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8c4c1f871a88..65c07aa95718 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1201,8 +1201,8 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, if (glops->go_instantiate) gl->gl_flags |= BIT(GLF_INSTANTIATE_NEEDED); gl->gl_name = name; + lockref_init(&gl->gl_lockref); lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass); - gl->gl_lockref.count = 1; gl->gl_state = LM_ST_UNLOCKED; gl->gl_target = LM_ST_UNLOCKED; gl->gl_demote_state = LM_ST_EXCLUSIVE; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 04cadc02e5a6..0727f60ad028 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -51,7 +51,6 @@ static void gfs2_init_glock_once(void *foo) { struct gfs2_glock *gl = foo; - spin_lock_init(&gl->gl_lockref.lock); INIT_LIST_HEAD(&gl->gl_holders); INIT_LIST_HEAD(&gl->gl_lru); INIT_LIST_HEAD(&gl->gl_ail_list); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 58bc5013ca49..2298e06797ac 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -236,7 +236,7 @@ static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, str return NULL; qd->qd_sbd = sdp; - lockref_init(&qd->qd_lockref, 0); + lockref_init(&qd->qd_lockref); qd->qd_id = qid; qd->qd_slot = -1; INIT_LIST_HEAD(&qd->qd_lru); @@ -297,7 +297,6 @@ static int qd_get(struct gfs2_sbd *sdp, struct kqid qid, spin_lock_bucket(hash); *qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid); if (qd == NULL) { - new_qd->qd_lockref.count++; *qdp = new_qd; list_add(&new_qd->qd_list, &sdp->sd_quota_list); hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]); @@ -1450,6 +1449,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) if (qd == NULL) goto fail_brelse; + qd->qd_lockref.count = 0; set_bit(QDF_CHANGE, &qd->qd_flags); qd->qd_change = qc_change; qd->qd_slot = slot; diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index b521eb15759e..0e47da82b0c2 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -427,12 +427,10 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter, bio_put(bio); goto zero_tail; } - if (dio->flags & IOMAP_DIO_WRITE) { + if (dio->flags & IOMAP_DIO_WRITE) task_io_account_write(n); - } else { - if (dio->flags & IOMAP_DIO_DIRTY) - bio_set_pages_dirty(bio); - } + else if (dio->flags & IOMAP_DIO_DIRTY) + bio_set_pages_dirty(bio); dio->size += n; copied += n; diff --git a/fs/namei.c b/fs/namei.c index 3ab9440c5b93..ecb7b95c2ca3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -5356,10 +5356,9 @@ const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done) EXPORT_SYMBOL(vfs_get_link); /* get the link contents into pagecache */ -const char *page_get_link(struct dentry *dentry, struct inode *inode, - struct delayed_call *callback) +static char *__page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) { - char *kaddr; struct page *page; struct address_space *mapping = inode->i_mapping; @@ -5378,8 +5377,23 @@ const char *page_get_link(struct dentry *dentry, struct inode *inode, } set_delayed_call(callback, page_put_link, page); BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM); - kaddr = page_address(page); - nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); + return page_address(page); +} + +const char *page_get_link_raw(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + return __page_get_link(dentry, inode, callback); +} +EXPORT_SYMBOL_GPL(page_get_link_raw); + +const char *page_get_link(struct dentry *dentry, struct inode *inode, + struct delayed_call *callback) +{ + char *kaddr = __page_get_link(dentry, inode, callback); + + if (!IS_ERR(kaddr)) + nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); return kaddr; } diff --git a/fs/namespace.c b/fs/namespace.c index a3ed3f2980cb..8f1000f9f3df 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5087,30 +5087,29 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq) { struct vfsmount *mnt = s->mnt; struct super_block *sb = mnt->mnt_sb; + size_t start = seq->count; int err; - if (sb->s_op->show_options) { - size_t start = seq->count; - - err = security_sb_show_options(seq, sb); - if (err) - return err; + err = security_sb_show_options(seq, sb); + if (err) + return err; + if (sb->s_op->show_options) { err = sb->s_op->show_options(seq, mnt->mnt_root); if (err) return err; + } - if (unlikely(seq_has_overflowed(seq))) - return -EAGAIN; + if (unlikely(seq_has_overflowed(seq))) + return -EAGAIN; - if (seq->count == start) - return 0; + if (seq->count == start) + return 0; - /* skip leading comma */ - memmove(seq->buf + start, seq->buf + start + 1, - seq->count - start - 1); - seq->count--; - } + /* skip leading comma */ + memmove(seq->buf + start, seq->buf + start + 1, + seq->count - start - 1); + seq->count--; return 0; } @@ -5191,39 +5190,45 @@ static int statmount_string(struct kstatmount *s, u64 flag) size_t kbufsize; struct seq_file *seq = &s->seq; struct statmount *sm = &s->sm; - u32 start = seq->count; + u32 start, *offp; + + /* Reserve an empty string at the beginning for any unset offsets */ + if (!seq->count) + seq_putc(seq, 0); + + start = seq->count; switch (flag) { case STATMOUNT_FS_TYPE: - sm->fs_type = start; + offp = &sm->fs_type; ret = statmount_fs_type(s, seq); break; case STATMOUNT_MNT_ROOT: - sm->mnt_root = start; + offp = &sm->mnt_root; ret = statmount_mnt_root(s, seq); break; case STATMOUNT_MNT_POINT: - sm->mnt_point = start; + offp = &sm->mnt_point; ret = statmount_mnt_point(s, seq); break; case STATMOUNT_MNT_OPTS: - sm->mnt_opts = start; + offp = &sm->mnt_opts; ret = statmount_mnt_opts(s, seq); break; case STATMOUNT_OPT_ARRAY: - sm->opt_array = start; + offp = &sm->opt_array; ret = statmount_opt_array(s, seq); break; case STATMOUNT_OPT_SEC_ARRAY: - sm->opt_sec_array = start; + offp = &sm->opt_sec_array; ret = statmount_opt_sec_array(s, seq); break; case STATMOUNT_FS_SUBTYPE: - sm->fs_subtype = start; + offp = &sm->fs_subtype; statmount_fs_subtype(s, seq); break; case STATMOUNT_SB_SOURCE: - sm->sb_source = start; + offp = &sm->sb_source; ret = statmount_sb_source(s, seq); break; default: @@ -5251,6 +5256,7 @@ static int statmount_string(struct kstatmount *s, u64 flag) seq->buf[seq->count++] = '\0'; sm->mask |= flag; + *offp = start; return 0; } diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index f761d44b3436..0d1b6d35ff3b 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -155,8 +155,9 @@ static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq, netfs_cache_read_terminated, subreq); } -static void netfs_issue_read(struct netfs_io_request *rreq, - struct netfs_io_subrequest *subreq) +static void netfs_queue_read(struct netfs_io_request *rreq, + struct netfs_io_subrequest *subreq, + bool last_subreq) { struct netfs_io_stream *stream = &rreq->io_streams[0]; @@ -177,8 +178,17 @@ static void netfs_issue_read(struct netfs_io_request *rreq, } } + if (last_subreq) { + smp_wmb(); /* Write lists before ALL_QUEUED. */ + set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); + } + spin_unlock(&rreq->lock); +} +static void netfs_issue_read(struct netfs_io_request *rreq, + struct netfs_io_subrequest *subreq) +{ switch (subreq->source) { case NETFS_DOWNLOAD_FROM_SERVER: rreq->netfs_ops->issue_read(subreq); @@ -293,11 +303,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq) } size -= slice; start += slice; - if (size <= 0) { - smp_wmb(); /* Write lists before ALL_QUEUED. */ - set_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags); - } + netfs_queue_read(rreq, subreq, size <= 0); netfs_issue_read(rreq, subreq); cond_resched(); } while (size > 0); diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index eb76f98c894b..1c4f953c3d68 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -135,6 +135,8 @@ extern atomic_t netfs_n_rh_write_begin; extern atomic_t netfs_n_rh_write_done; extern atomic_t netfs_n_rh_write_failed; extern atomic_t netfs_n_rh_write_zskip; +extern atomic_t netfs_n_rh_retry_read_req; +extern atomic_t netfs_n_rh_retry_read_subreq; extern atomic_t netfs_n_wh_buffered_write; extern atomic_t netfs_n_wh_writethrough; extern atomic_t netfs_n_wh_dio_write; @@ -147,6 +149,8 @@ extern atomic_t netfs_n_wh_upload_failed; extern atomic_t netfs_n_wh_write; extern atomic_t netfs_n_wh_write_done; extern atomic_t netfs_n_wh_write_failed; +extern atomic_t netfs_n_wh_retry_write_req; +extern atomic_t netfs_n_wh_retry_write_subreq; extern atomic_t netfs_n_wb_lock_skip; extern atomic_t netfs_n_wb_lock_wait; extern atomic_t netfs_n_folioq; diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index f65affa5a9e4..636cc5a98ef5 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -470,7 +470,8 @@ void netfs_read_collection_worker(struct work_struct *work) */ void netfs_wake_read_collector(struct netfs_io_request *rreq) { - if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) { + if (test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags) && + !test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) { if (!work_pending(&rreq->work)) { netfs_get_request(rreq, netfs_rreq_trace_get_work); if (!queue_work(system_unbound_wq, &rreq->work)) @@ -586,7 +587,8 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq) smp_mb__after_atomic(); /* Clear IN_PROGRESS before task state */ /* If we are at the head of the queue, wake up the collector. */ - if (list_is_first(&subreq->rreq_link, &stream->subrequests)) + if (list_is_first(&subreq->rreq_link, &stream->subrequests) || + test_bit(NETFS_RREQ_RETRYING, &rreq->flags)) netfs_wake_read_collector(rreq); netfs_put_subrequest(subreq, true, netfs_sreq_trace_put_terminated); diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c index 2290af0d51ac..0f294b26e08c 100644 --- a/fs/netfs/read_retry.c +++ b/fs/netfs/read_retry.c @@ -14,7 +14,7 @@ static void netfs_reissue_read(struct netfs_io_request *rreq, { __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); - netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); + netfs_stat(&netfs_n_rh_retry_read_subreq); subreq->rreq->netfs_ops->issue_read(subreq); } @@ -48,6 +48,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); subreq->retry_count++; netfs_reset_iter(subreq); + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); netfs_reissue_read(rreq, subreq); } } @@ -75,7 +76,7 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) struct iov_iter source; unsigned long long start, len; size_t part; - bool boundary = false; + bool boundary = false, subreq_superfluous = false; /* Go through the subreqs and find the next span of contiguous * buffer that we then rejig (cifs, for example, needs the @@ -116,8 +117,10 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) /* Work through the sublist. */ subreq = from; list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) { - if (!len) + if (!len) { + subreq_superfluous = true; break; + } subreq->source = NETFS_DOWNLOAD_FROM_SERVER; subreq->start = start - subreq->transferred; subreq->len = len + subreq->transferred; @@ -154,19 +157,21 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); netfs_reissue_read(rreq, subreq); - if (subreq == to) + if (subreq == to) { + subreq_superfluous = false; break; + } } /* If we managed to use fewer subreqs, we can discard the * excess; if we used the same number, then we're done. */ if (!len) { - if (subreq == to) + if (!subreq_superfluous) continue; list_for_each_entry_safe_from(subreq, tmp, &stream->subrequests, rreq_link) { - trace_netfs_sreq(subreq, netfs_sreq_trace_discard); + trace_netfs_sreq(subreq, netfs_sreq_trace_superfluous); list_del(&subreq->rreq_link); netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); if (subreq == to) @@ -187,14 +192,12 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) subreq->source = NETFS_DOWNLOAD_FROM_SERVER; subreq->start = start; subreq->len = len; - subreq->debug_index = atomic_inc_return(&rreq->subreq_counter); subreq->stream_nr = stream->stream_nr; subreq->retry_count = 1; trace_netfs_sreq_ref(rreq->debug_id, subreq->debug_index, refcount_read(&subreq->ref), netfs_sreq_trace_new); - netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); list_add(&subreq->rreq_link, &to->rreq_link); to = list_next_entry(to, rreq_link); @@ -256,14 +259,34 @@ void netfs_retry_reads(struct netfs_io_request *rreq) { struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream = &rreq->io_streams[0]; + DEFINE_WAIT(myself); + + netfs_stat(&netfs_n_rh_retry_read_req); + + set_bit(NETFS_RREQ_RETRYING, &rreq->flags); /* Wait for all outstanding I/O to quiesce before performing retries as * we may need to renegotiate the I/O sizes. */ list_for_each_entry(subreq, &stream->subrequests, rreq_link) { - wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS, - TASK_UNINTERRUPTIBLE); + if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + continue; + + trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue); + for (;;) { + prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE); + + if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags)) + break; + + trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for); + schedule(); + trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue); + } + + finish_wait(&rreq->waitq, &myself); } + clear_bit(NETFS_RREQ_RETRYING, &rreq->flags); trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); netfs_retry_read_subrequests(rreq); diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c index f1af344266cc..ab6b916addc4 100644 --- a/fs/netfs/stats.c +++ b/fs/netfs/stats.c @@ -29,6 +29,8 @@ atomic_t netfs_n_rh_write_begin; atomic_t netfs_n_rh_write_done; atomic_t netfs_n_rh_write_failed; atomic_t netfs_n_rh_write_zskip; +atomic_t netfs_n_rh_retry_read_req; +atomic_t netfs_n_rh_retry_read_subreq; atomic_t netfs_n_wh_buffered_write; atomic_t netfs_n_wh_writethrough; atomic_t netfs_n_wh_dio_write; @@ -41,6 +43,8 @@ atomic_t netfs_n_wh_upload_failed; atomic_t netfs_n_wh_write; atomic_t netfs_n_wh_write_done; atomic_t netfs_n_wh_write_failed; +atomic_t netfs_n_wh_retry_write_req; +atomic_t netfs_n_wh_retry_write_subreq; atomic_t netfs_n_wb_lock_skip; atomic_t netfs_n_wb_lock_wait; atomic_t netfs_n_folioq; @@ -81,6 +85,11 @@ int netfs_stats_show(struct seq_file *m, void *v) atomic_read(&netfs_n_wh_write), atomic_read(&netfs_n_wh_write_done), atomic_read(&netfs_n_wh_write_failed)); + seq_printf(m, "Retries: rq=%u rs=%u wq=%u ws=%u\n", + atomic_read(&netfs_n_rh_retry_read_req), + atomic_read(&netfs_n_rh_retry_read_subreq), + atomic_read(&netfs_n_wh_retry_write_req), + atomic_read(&netfs_n_wh_retry_write_subreq)); seq_printf(m, "Objs : rr=%u sr=%u foq=%u wsc=%u\n", atomic_read(&netfs_n_rh_rreq), atomic_read(&netfs_n_rh_sreq), diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 69727411683e..77279fc5b5a7 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -253,6 +253,7 @@ void netfs_reissue_write(struct netfs_io_stream *stream, subreq->retry_count++; __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags); __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); + netfs_stat(&netfs_n_wh_retry_write_subreq); netfs_do_issue_write(stream, subreq); } diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c index c841a851dd73..545d33079a77 100644 --- a/fs/netfs/write_retry.c +++ b/fs/netfs/write_retry.c @@ -203,6 +203,8 @@ void netfs_retry_writes(struct netfs_io_request *wreq) struct netfs_io_stream *stream; int s; + netfs_stat(&netfs_n_wh_retry_write_req); + /* Wait for all outstanding I/O to quiesce before performing retries as * we may need to renegotiate the I/O sizes. */ diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 035ba52742a5..4db912f56230 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -781,6 +781,43 @@ int nfs4_inode_return_delegation(struct inode *inode) } /** + * nfs4_inode_set_return_delegation_on_close - asynchronously return a delegation + * @inode: inode to process + * + * This routine is called to request that the delegation be returned as soon + * as the file is closed. If the file is already closed, the delegation is + * immediately returned. + */ +void nfs4_inode_set_return_delegation_on_close(struct inode *inode) +{ + struct nfs_delegation *delegation; + struct nfs_delegation *ret = NULL; + + if (!inode) + return; + rcu_read_lock(); + delegation = nfs4_get_valid_delegation(inode); + if (!delegation) + goto out; + spin_lock(&delegation->lock); + if (!delegation->inode) + goto out_unlock; + if (list_empty(&NFS_I(inode)->open_files) && + !test_and_set_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) { + /* Refcount matched in nfs_end_delegation_return() */ + ret = nfs_get_delegation(delegation); + } else + set_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags); +out_unlock: + spin_unlock(&delegation->lock); + if (ret) + nfs_clear_verifier_delegated(inode); +out: + rcu_read_unlock(); + nfs_end_delegation_return(inode, ret, 0); +} + +/** * nfs4_inode_return_delegation_on_close - asynchronously return a delegation * @inode: inode to process * diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 71524d34ed20..8ff5ab9c5c25 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -49,6 +49,7 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred, unsigned long pagemod_limit, u32 deleg_type); int nfs4_inode_return_delegation(struct inode *inode); void nfs4_inode_return_delegation_on_close(struct inode *inode); +void nfs4_inode_set_return_delegation_on_close(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); void nfs_inode_evict_delegation(struct inode *inode); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f45beea92d03..f32f8d7c9122 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -56,6 +56,7 @@ #include <linux/uaccess.h> #include <linux/atomic.h> +#include "delegation.h" #include "internal.h" #include "iostat.h" #include "pnfs.h" @@ -130,6 +131,20 @@ static void nfs_direct_truncate_request(struct nfs_direct_req *dreq, dreq->count = req_start; } +static void nfs_direct_file_adjust_size_locked(struct inode *inode, + loff_t offset, size_t count) +{ + loff_t newsize = offset + (loff_t)count; + loff_t oldsize = i_size_read(inode); + + if (newsize > oldsize) { + i_size_write(inode, newsize); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE; + trace_nfs_size_grow(inode, newsize); + nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); + } +} + /** * nfs_swap_rw - NFS address space operation for swap I/O * @iocb: target I/O control block @@ -272,6 +287,8 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) nfs_direct_count_bytes(dreq, hdr); spin_unlock(&dreq->lock); + nfs_update_delegated_atime(dreq->inode); + while (!list_empty(&hdr->pages)) { struct nfs_page *req = nfs_list_entry(hdr->pages.next); struct page *page = req->wb_page; @@ -741,6 +758,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) struct nfs_direct_req *dreq = hdr->dreq; struct nfs_commit_info cinfo; struct nfs_page *req = nfs_list_entry(hdr->pages.next); + struct inode *inode = dreq->inode; int flags = NFS_ODIRECT_DONE; trace_nfs_direct_write_completion(dreq); @@ -762,6 +780,11 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) } spin_unlock(&dreq->lock); + spin_lock(&inode->i_lock); + nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count); + nfs_update_delegated_mtime_locked(dreq->inode); + spin_unlock(&inode->i_lock); + while (!list_empty(&hdr->pages)) { req = nfs_list_entry(hdr->pages.next); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index df9669d4ded7..6e95db6c17e9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -133,6 +133,7 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry, if (err) return NULL; + label->lsmid = shim.id; label->label = shim.context; label->len = shim.len; return label; @@ -145,7 +146,7 @@ nfs4_label_release_security(struct nfs4_label *label) if (label) { shim.context = label->label; shim.len = label->len; - shim.id = LSM_ID_UNDEF; + shim.id = label->lsmid; security_release_secctx(&shim); } } @@ -3906,8 +3907,11 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) { + struct dentry *dentry = ctx->dentry; if (ctx->state == NULL) return; + if (dentry->d_flags & DCACHE_NFSFS_RENAMED) + nfs4_inode_set_return_delegation_on_close(d_inode(dentry)); if (is_sync) nfs4_close_sync(ctx->state, _nfs4_ctx_to_openmode(ctx)); else @@ -6269,7 +6273,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, size_t buflen) { struct nfs_server *server = NFS_SERVER(inode); - struct nfs4_label label = {0, 0, buflen, buf}; + struct nfs4_label label = {0, 0, 0, buflen, buf}; u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; struct nfs_fattr fattr = { @@ -6374,7 +6378,7 @@ static int nfs4_do_set_security_label(struct inode *inode, static int nfs4_set_security_label(struct inode *inode, const void *buf, size_t buflen) { - struct nfs4_label ilabel = {0, 0, buflen, (char *)buf }; + struct nfs4_label ilabel = {0, 0, 0, buflen, (char *)buf }; struct nfs_fattr *fattr; int status; diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 0e552d873eaa..fb9b1656a287 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -446,11 +446,20 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose) struct nfsd_file, nf_gc); struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id); struct nfsd_fcache_disposal *l = nn->fcache_disposal; + struct svc_serv *serv; spin_lock(&l->lock); list_move_tail(&nf->nf_gc, &l->freeme); spin_unlock(&l->lock); - svc_wake_up(nn->nfsd_serv); + + /* + * The filecache laundrette is shut down after the + * nn->nfsd_serv pointer is cleared, but before the + * svc_serv is freed. + */ + serv = nn->nfsd_serv; + if (serv) + svc_wake_up(serv); } } diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c index 4e3be7201b1c..5fb202acb0fd 100644 --- a/fs/nfsd/nfs2acl.c +++ b/fs/nfsd/nfs2acl.c @@ -84,6 +84,8 @@ out: fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); + resp->acl_access = NULL; + resp->acl_default = NULL; goto out; } diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c index 5e34e98db969..7b5433bd3019 100644 --- a/fs/nfsd/nfs3acl.c +++ b/fs/nfsd/nfs3acl.c @@ -76,6 +76,8 @@ out: fail: posix_acl_release(resp->acl_access); posix_acl_release(resp->acl_default); + resp->acl_access = NULL; + resp->acl_default = NULL; goto out; } diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 50e468bdb8d4..484077200c5d 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -679,7 +679,7 @@ static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp, return status; status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status); - if (unlikely(status || cb->cb_seq_status)) + if (unlikely(status || cb->cb_status)) return status; if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0) return -NFSERR_BAD_XDR; @@ -1583,8 +1583,11 @@ nfsd4_run_cb_work(struct work_struct *work) nfsd4_process_cb_update(cb); clnt = clp->cl_cb_client; - if (!clnt) { - /* Callback channel broken, or client killed; give up: */ + if (!clnt || clp->cl_state == NFSD4_COURTESY) { + /* + * Callback channel broken, client killed or + * nfs4_client in courtesy state; give up. + */ nfsd41_destroy_cb(cb); return; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b7a0cfd05401..153eeea2c7c9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4459,10 +4459,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } } while (slot && --cnt > 0); } + +out: seq->maxslots = max(session->se_target_maxslots, seq->maxslots); seq->target_maxslots = session->se_target_maxslots; -out: switch (clp->cl_cb_state) { case NFSD4_CB_DOWN: seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 32019751a41e..aef474f1b84b 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -380,8 +380,9 @@ __fh_verify(struct svc_rqst *rqstp, error = check_nfsd_access(exp, rqstp, may_bypass_gss); if (error) goto out; - - svc_xprt_set_valid(rqstp->rq_xprt); + /* During LOCALIO call to fh_verify will be called with a NULL rqstp */ + if (rqstp) + svc_xprt_set_valid(rqstp->rq_xprt); /* Finally, check access permissions. */ error = nfsd_permission(cred, exp, dentry, access); diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 8ee495a58d0a..fae1b6d397ea 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -648,7 +648,7 @@ EXPORT_SYMBOL_GPL(fsnotify); * Later, fsnotify permission hooks do not check if there are permission event * watches, but that there were permission event watches at open time. */ -void file_set_fsnotify_mode(struct file *file) +void file_set_fsnotify_mode_from_watchers(struct file *file) { struct dentry *dentry = file->f_path.dentry, *parent; struct super_block *sb = dentry->d_sb; @@ -665,7 +665,7 @@ void file_set_fsnotify_mode(struct file *file) */ if (likely(!fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_CONTENT))) { - file->f_mode |= FMODE_NONOTIFY_PERM; + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); return; } @@ -676,7 +676,7 @@ void file_set_fsnotify_mode(struct file *file) if ((!d_is_dir(dentry) && !d_is_reg(dentry)) || likely(!fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_PRE_CONTENT))) { - file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM; + file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); return; } @@ -686,19 +686,25 @@ void file_set_fsnotify_mode(struct file *file) */ mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask, - FSNOTIFY_PRE_CONTENT_EVENTS))) + FSNOTIFY_PRE_CONTENT_EVENTS))) { + /* Enable pre-content events */ + file_set_fsnotify_mode(file, 0); return; + } /* Is parent watching for pre-content events on this file? */ if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { parent = dget_parent(dentry); p_mask = fsnotify_inode_watches_children(d_inode(parent)); dput(parent); - if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) + if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) { + /* Enable pre-content events */ + file_set_fsnotify_mode(file, 0); return; + } } /* Nobody watching for pre-content events from this file */ - file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM; + file_set_fsnotify_mode(file, FMODE_NONOTIFY | FMODE_NONOTIFY_PERM); } #endif diff --git a/fs/nsfs.c b/fs/nsfs.c index 663f8656158d..f7fddf8ecf73 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -37,7 +37,6 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) } const struct dentry_operations ns_dentry_operations = { - .d_delete = always_delete_dentry, .d_dname = ns_dname, .d_prune = stashed_dentry_prune, }; diff --git a/fs/open.c b/fs/open.c index 932e5a6de63b..1be20de9f283 100644 --- a/fs/open.c +++ b/fs/open.c @@ -905,7 +905,8 @@ static int do_dentry_open(struct file *f, f->f_sb_err = file_sample_sb_err(f); if (unlikely(f->f_flags & O_PATH)) { - f->f_mode = FMODE_PATH | FMODE_OPENED | FMODE_NONOTIFY; + f->f_mode = FMODE_PATH | FMODE_OPENED; + file_set_fsnotify_mode(f, FMODE_NONOTIFY); f->f_op = &empty_fops; return 0; } @@ -935,10 +936,10 @@ static int do_dentry_open(struct file *f, /* * Set FMODE_NONOTIFY_* bits according to existing permission watches. - * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't - * change anything. + * If FMODE_NONOTIFY mode was already set for an fanotify fd or for a + * pseudo file, this call will not change the mode. */ - file_set_fsnotify_mode(f); + file_set_fsnotify_mode_from_watchers(f); error = fsnotify_open_perm(f); if (error) goto cleanup_all; @@ -1122,7 +1123,7 @@ struct file *dentry_open_nonotify(const struct path *path, int flags, if (!IS_ERR(f)) { int error; - f->f_mode |= FMODE_NONOTIFY; + file_set_fsnotify_mode(f, FMODE_NONOTIFY); error = vfs_open(path, f); if (error) { fput(f); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 0c28e5fa3407..d7310fcf3888 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -618,7 +618,6 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) err = PTR_ERR(upper); if (!IS_ERR(upper)) { err = ovl_do_link(ofs, ovl_dentry_upper(c->dentry), udir, upper); - dput(upper); if (!err) { /* Restore timestamps on parent (best effort) */ @@ -626,6 +625,7 @@ static int ovl_link_up(struct ovl_copy_up_ctx *c) ovl_dentry_set_upper_alias(c->dentry); ovl_dentry_update_reval(c->dentry, upper); } + dput(upper); } inode_unlock(udir); if (err) diff --git a/fs/pidfs.c b/fs/pidfs.c index 049352f973de..c0478b3c55d9 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -287,7 +287,6 @@ static bool pidfs_ioctl_valid(unsigned int cmd) switch (cmd) { case FS_IOC_GETVERSION: case PIDFD_GET_CGROUP_NAMESPACE: - case PIDFD_GET_INFO: case PIDFD_GET_IPC_NAMESPACE: case PIDFD_GET_MNT_NAMESPACE: case PIDFD_GET_NET_NAMESPACE: @@ -300,6 +299,17 @@ static bool pidfs_ioctl_valid(unsigned int cmd) return true; } + /* Extensible ioctls require some more careful checks. */ + switch (_IOC_NR(cmd)) { + case _IOC_NR(PIDFD_GET_INFO): + /* + * Try to prevent performing a pidfd ioctl when someone + * erronously mistook the file descriptor for a pidfd. + * This is not perfect but will catch most cases. + */ + return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO)); + } + return false; } @@ -511,7 +521,6 @@ static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) } const struct dentry_operations pidfs_dentry_operations = { - .d_delete = always_delete_dentry, .d_dname = pidfs_dname, .d_prune = stashed_dentry_prune, }; diff --git a/fs/pipe.c b/fs/pipe.c index 94b59045ab44..ce1af7592780 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -960,6 +960,12 @@ int create_pipe_files(struct file **res, int flags) res[1] = f; stream_open(inode, res[0]); stream_open(inode, res[1]); + /* + * Disable permission and pre-content events, but enable legacy + * inotify events for legacy users. + */ + file_set_fsnotify_mode(res[0], FMODE_NONOTIFY_PERM); + file_set_fsnotify_mode(res[1], FMODE_NONOTIFY_PERM); return 0; } diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index a00120a3c099..10d01eb09c43 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -1524,7 +1524,7 @@ int vmcore_add_device_dump(struct vmcoredd_data *data) pr_warn_once("Unexpected adding of device dump\n"); if (vmcore_open) { ret = -EBUSY; - goto out_err; + goto unlock; } list_add_tail(&dump->list, &vmcoredd_list); @@ -1532,6 +1532,9 @@ int vmcore_add_device_dump(struct vmcoredd_data *data) mutex_unlock(&vmcore_mutex); return 0; +unlock: + mutex_unlock(&vmcore_mutex); + out_err: vfree(buf); vfree(dump); diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index a68434ad744a..cddeb2adbf4a 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -253,6 +253,7 @@ struct cifs_cred { struct cifs_open_info_data { bool adjust_tz; bool reparse_point; + bool contains_posix_file_info; struct { /* ioctl response buffer */ struct { @@ -357,7 +358,7 @@ struct smb_version_operations { int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache); + __u16 epoch, bool *purge_cache); /* process transaction2 response */ bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, char *, int); @@ -552,12 +553,12 @@ struct smb_version_operations { /* if we can do cache read operations */ bool (*is_read_op)(__u32); /* set oplock level for the inode */ - void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int, - bool *); + void (*set_oplock_level)(struct cifsInodeInfo *cinode, __u32 oplock, __u16 epoch, + bool *purge_cache); /* create lease context buffer for CREATE request */ char * (*create_lease_buf)(u8 *lease_key, u8 oplock); /* parse lease context buffer and return oplock/epoch info */ - __u8 (*parse_lease_buf)(void *buf, unsigned int *epoch, char *lkey); + __u8 (*parse_lease_buf)(void *buf, __u16 *epoch, char *lkey); ssize_t (*copychunk_range)(const unsigned int, struct cifsFileInfo *src_file, struct cifsFileInfo *target_file, @@ -1447,7 +1448,7 @@ struct cifs_fid { __u8 create_guid[16]; __u32 access; struct cifs_pending_open *pending_open; - unsigned int epoch; + __u16 epoch; #ifdef CONFIG_CIFS_DEBUG2 __u64 mid; #endif /* CIFS_DEBUG2 */ @@ -1480,7 +1481,7 @@ struct cifsFileInfo { bool oplock_break_cancelled:1; bool status_file_deleted:1; /* file has been deleted */ bool offload:1; /* offload final part of _put to a wq */ - unsigned int oplock_epoch; /* epoch from the lease break */ + __u16 oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ int count; spinlock_t file_info_lock; /* protects four flag/count fields above */ @@ -1508,7 +1509,6 @@ struct cifs_io_parms { struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; - struct TCP_Server_Info *server; pid_t pid; }; @@ -1577,7 +1577,7 @@ struct cifsInodeInfo { spinlock_t open_file_lock; /* protects openFileList */ __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ unsigned int oplock; /* oplock/lease level we have */ - unsigned int epoch; /* used to track lease state changes */ + __u16 epoch; /* used to track lease state changes */ #define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */ #define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */ #define CIFS_INODE_FLAG_UNUSED (2) /* Unused flag */ @@ -2325,8 +2325,8 @@ struct smb2_compound_vars { struct kvec io_iov[SMB2_IOCTL_IOV_SIZE]; struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE]; struct kvec close_iov; - struct smb2_file_rename_info rename_info; - struct smb2_file_link_info link_info; + struct smb2_file_rename_info_hdr rename_info; + struct smb2_file_link_info_hdr link_info; struct kvec ea_iov; }; diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index 3feaa0f68169..d07682020c64 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1338,7 +1338,8 @@ cifs_readv_callback(struct mid_q_entry *mid) rdata->credits.value = 0; rdata->subreq.error = rdata->result; rdata->subreq.transferred += rdata->got_bytes; - queue_work(cifsiod_wq, &rdata->subreq.work); + trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_progress); + netfs_read_subreq_terminated(&rdata->subreq); release_mid(mid); add_credits(server, &credits, 0); } diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c index dad521336b5e..f65a8a90ba27 100644 --- a/fs/smb/client/dfs.c +++ b/fs/smb/client/dfs.c @@ -150,25 +150,27 @@ again: if (rc) continue; - if (tgt.flags & DFSREF_STORAGE_SERVER) { - rc = cifs_mount_get_tcon(mnt_ctx); - if (!rc) - rc = cifs_is_path_remote(mnt_ctx); + rc = cifs_mount_get_tcon(mnt_ctx); + if (rc) { + if (tgt.server_type == DFS_TYPE_LINK && + DFS_INTERLINK(tgt.flags)) + rc = -EREMOTE; + } else { + rc = cifs_is_path_remote(mnt_ctx); if (!rc) { ref_walk_set_tgt_hint(rw); break; } - if (rc != -EREMOTE) - continue; } - - rc = ref_walk_advance(rw); - if (!rc) { - rc = setup_dfs_ref(&tgt, rw); - if (rc) - break; - ref_walk_mark_end(rw); - goto again; + if (rc == -EREMOTE) { + rc = ref_walk_advance(rw); + if (!rc) { + rc = setup_dfs_ref(&tgt, rw); + if (rc) + break; + ref_walk_mark_end(rw); + goto again; + } } } } while (rc && ref_walk_descend(rw)); diff --git a/fs/smb/client/dfs.h b/fs/smb/client/dfs.h index ed4cd7cf1ec6..e60f0a24a8a1 100644 --- a/fs/smb/client/dfs.h +++ b/fs/smb/client/dfs.h @@ -188,4 +188,11 @@ static inline void dfs_put_root_smb_sessions(struct list_head *head) } } +static inline const char *dfs_ses_refpath(struct cifs_ses *ses) +{ + const char *path = ses->server->leaf_fullpath; + + return path ? path + 1 : ERR_PTR(-ENOENT); +} + #endif /* _CIFS_DFS_H */ diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c index 5022bb1f122a..4dada26d56b5 100644 --- a/fs/smb/client/dfs_cache.c +++ b/fs/smb/client/dfs_cache.c @@ -1136,33 +1136,19 @@ static bool is_ses_good(struct cifs_ses *ses) return ret; } -static char *get_ses_refpath(struct cifs_ses *ses) -{ - struct TCP_Server_Info *server = ses->server; - char *path = ERR_PTR(-ENOENT); - - if (server->leaf_fullpath) { - path = kstrdup(server->leaf_fullpath + 1, GFP_KERNEL); - if (!path) - path = ERR_PTR(-ENOMEM); - } - return path; -} - /* Refresh dfs referral of @ses */ static void refresh_ses_referral(struct cifs_ses *ses) { struct cache_entry *ce; unsigned int xid; - char *path; + const char *path; int rc = 0; xid = get_xid(); - path = get_ses_refpath(ses); + path = dfs_ses_refpath(ses); if (IS_ERR(path)) { rc = PTR_ERR(path); - path = NULL; goto out; } @@ -1181,7 +1167,6 @@ static void refresh_ses_referral(struct cifs_ses *ses) out: free_xid(xid); - kfree(path); } static int __refresh_tcon_referral(struct cifs_tcon *tcon, @@ -1231,19 +1216,18 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh) struct dfs_info3_param *refs = NULL; struct cache_entry *ce; struct cifs_ses *ses; - unsigned int xid; bool needs_refresh; - char *path; + const char *path; + unsigned int xid; int numrefs = 0; int rc = 0; xid = get_xid(); ses = tcon->ses; - path = get_ses_refpath(ses); + path = dfs_ses_refpath(ses); if (IS_ERR(path)) { rc = PTR_ERR(path); - path = NULL; goto out; } @@ -1271,7 +1255,6 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh) out: free_xid(xid); - kfree(path); free_dfs_info_array(refs, numrefs); } diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 79de2f2f9c41..8582cf61242c 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -147,7 +147,7 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) struct netfs_io_request *rreq = subreq->rreq; struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); - struct TCP_Server_Info *server = req->server; + struct TCP_Server_Info *server; struct cifs_sb_info *cifs_sb = CIFS_SB(rreq->inode->i_sb); size_t size; int rc = 0; @@ -156,6 +156,8 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) rdata->xid = get_xid(); rdata->have_xid = true; } + + server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); rdata->server = server; if (cifs_sb->ctx->rsize == 0) @@ -198,7 +200,7 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq) struct netfs_io_request *rreq = subreq->rreq; struct cifs_io_subrequest *rdata = container_of(subreq, struct cifs_io_subrequest, subreq); struct cifs_io_request *req = container_of(subreq->rreq, struct cifs_io_request, rreq); - struct TCP_Server_Info *server = req->server; + struct TCP_Server_Info *server = rdata->server; int rc = 0; cifs_dbg(FYI, "%s: op=%08x[%x] mapping=%p len=%zu/%zu\n", @@ -266,7 +268,6 @@ static int cifs_init_request(struct netfs_io_request *rreq, struct file *file) open_file = file->private_data; rreq->netfs_priv = file->private_data; req->cfile = cifsFileInfo_get(open_file); - req->server = cifs_pick_channel(tlink_tcon(req->cfile->tlink)->ses); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) req->pid = req->cfile->pid; } else if (rreq->origin != NETFS_WRITEBACK) { diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 9cc31cf6ebd0..616149c7f0a5 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1215,6 +1215,19 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, rc = server->ops->parse_reparse_point(cifs_sb, full_path, iov, data); + /* + * If the reparse point was not handled but it is the + * name surrogate which points to directory, then treat + * is as a new mount point. Name surrogate reparse point + * represents another named entity in the system. + */ + if (rc == -EOPNOTSUPP && + IS_REPARSE_TAG_NAME_SURROGATE(data->reparse.tag) && + (le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY)) { + rc = 0; + cifs_create_junction_fattr(fattr, sb); + goto out; + } } if (data->reparse.tag == IO_REPARSE_TAG_SYMLINK && !rc) { @@ -1408,7 +1421,7 @@ int cifs_get_inode_info(struct inode **inode, struct cifs_fattr fattr = {}; int rc; - if (is_inode_cache_good(*inode)) { + if (!data && is_inode_cache_good(*inode)) { cifs_dbg(FYI, "No need to revalidate cached inode sizes\n"); return 0; } @@ -1507,7 +1520,7 @@ int smb311_posix_get_inode_info(struct inode **inode, struct cifs_fattr fattr = {}; int rc; - if (is_inode_cache_good(*inode)) { + if (!data && is_inode_cache_good(*inode)) { cifs_dbg(FYI, "No need to revalidate cached inode sizes\n"); return 0; } diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index 0a5a52a8a7dd..2b9e9885dc42 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -1088,13 +1088,12 @@ int parse_reparse_point(struct reparse_data_buffer *buf, le32_to_cpu(buf->ReparseTag)); return -EIO; } - break; + return 0; default: cifs_tcon_dbg(VFS | ONCE, "unhandled reparse tag: 0x%08x\n", le32_to_cpu(buf->ReparseTag)); - break; + return -EOPNOTSUPP; } - return 0; } int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index 5a753fec7e2c..c0be5ab45a78 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -99,14 +99,30 @@ static inline bool reparse_inode_match(struct inode *inode, static inline bool cifs_open_data_reparse(struct cifs_open_info_data *data) { - struct smb2_file_all_info *fi = &data->fi; - u32 attrs = le32_to_cpu(fi->Attributes); + u32 attrs; bool ret; - ret = data->reparse_point || (attrs & ATTR_REPARSE); - if (ret) - attrs |= ATTR_REPARSE; - fi->Attributes = cpu_to_le32(attrs); + if (data->contains_posix_file_info) { + struct smb311_posix_qinfo *fi = &data->posix_fi; + + attrs = le32_to_cpu(fi->DosAttributes); + if (data->reparse_point) { + attrs |= ATTR_REPARSE; + fi->DosAttributes = cpu_to_le32(attrs); + } + + } else { + struct smb2_file_all_info *fi = &data->fi; + + attrs = le32_to_cpu(fi->Attributes); + if (data->reparse_point) { + attrs |= ATTR_REPARSE; + fi->Attributes = cpu_to_le32(attrs); + } + } + + ret = attrs & ATTR_REPARSE; + return ret; } diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index 9756b876a75e..d6e2fb669c40 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -377,7 +377,7 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr) static void cifs_downgrade_oplock(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { cifs_set_oplock_level(cinode, oplock); } diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 5dfb30b0a852..826b57a5a2a8 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -650,6 +650,7 @@ finished: switch (cmds[i]) { case SMB2_OP_QUERY_INFO: idata = in_iov[i].iov_base; + idata->contains_posix_file_info = false; if (rc == 0 && cfile && cfile->symlink_target) { idata->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL); if (!idata->symlink_target) @@ -673,6 +674,7 @@ finished: break; case SMB2_OP_POSIX_QUERY_INFO: idata = in_iov[i].iov_base; + idata->contains_posix_file_info = true; if (rc == 0 && cfile && cfile->symlink_target) { idata->symlink_target = kstrdup(cfile->symlink_target, GFP_KERNEL); if (!idata->symlink_target) @@ -770,6 +772,7 @@ finished: idata = in_iov[i].iov_base; idata->reparse.io.iov = *iov; idata->reparse.io.buftype = resp_buftype[i + 1]; + idata->contains_posix_file_info = false; /* BB VERIFY */ rbuf = reparse_buf_ptr(iov); if (IS_ERR(rbuf)) { rc = PTR_ERR(rbuf); @@ -791,6 +794,7 @@ finished: case SMB2_OP_QUERY_WSL_EA: if (!rc) { idata = in_iov[i].iov_base; + idata->contains_posix_file_info = false; qi_rsp = rsp_iov[i + 1].iov_base; data[0] = (u8 *)qi_rsp + le16_to_cpu(qi_rsp->OutputBufferOffset); size[0] = le32_to_cpu(qi_rsp->OutputBufferLength); diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 77309217dab4..4dd11eafb69d 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -1001,6 +1001,7 @@ static int smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon, if (!data->symlink_target) return -ENOMEM; } + data->contains_posix_file_info = false; return SMB2_query_info(xid, tcon, fid->persistent_fid, fid->volatile_fid, &data->fi); } @@ -3904,22 +3905,22 @@ static long smb3_fallocate(struct file *file, struct cifs_tcon *tcon, int mode, static void smb2_downgrade_oplock(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { server->ops->set_oplock_level(cinode, oplock, 0, NULL); } static void smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache); + __u16 epoch, bool *purge_cache); static void smb3_downgrade_oplock(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { unsigned int old_state = cinode->oplock; - unsigned int old_epoch = cinode->epoch; + __u16 old_epoch = cinode->epoch; unsigned int new_state; if (epoch > old_epoch) { @@ -3939,7 +3940,7 @@ smb3_downgrade_oplock(struct TCP_Server_Info *server, static void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { oplock &= 0xFF; cinode->lease_granted = false; @@ -3963,7 +3964,7 @@ smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, static void smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { char message[5] = {0}; unsigned int new_oplock = 0; @@ -4000,7 +4001,7 @@ smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, static void smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock, - unsigned int epoch, bool *purge_cache) + __u16 epoch, bool *purge_cache) { unsigned int old_oplock = cinode->oplock; @@ -4114,7 +4115,7 @@ smb3_create_lease_buf(u8 *lease_key, u8 oplock) } static __u8 -smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) +smb2_parse_lease_buf(void *buf, __u16 *epoch, char *lease_key) { struct create_lease *lc = (struct create_lease *)buf; @@ -4125,7 +4126,7 @@ smb2_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) } static __u8 -smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key) +smb3_parse_lease_buf(void *buf, __u16 *epoch, char *lease_key) { struct create_lease_v2 *lc = (struct create_lease_v2 *)buf; @@ -4964,6 +4965,10 @@ one_more: next_buffer = (char *)cifs_buf_get(); else next_buffer = (char *)cifs_small_buf_get(); + if (!next_buffer) { + cifs_server_dbg(VFS, "No memory for (large) SMB response\n"); + return -1; + } memcpy(next_buffer, buf + next_cmd, pdu_length - next_cmd); } @@ -5146,7 +5151,7 @@ int __cifs_sfu_make_node(unsigned int xid, struct inode *inode, FILE_CREATE, CREATE_NOT_DIR | CREATE_OPTION_SPECIAL, ACL_NO_MODE); oparms.fid = &fid; - + idata.contains_posix_file_info = false; rc = server->ops->open(xid, &oparms, &oplock, &idata); if (rc) goto out; diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 40ad9e79437a..ed7812247ebc 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -2169,7 +2169,7 @@ tcon_exit: tcon_error_exit: if (rsp && rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) - cifs_tcon_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); + cifs_dbg(VFS | ONCE, "BAD_NETWORK_NAME: %s\n", tree); goto tcon_exit; } @@ -2329,7 +2329,7 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info, int smb2_parse_contexts(struct TCP_Server_Info *server, struct kvec *rsp_iov, - unsigned int *epoch, + __u16 *epoch, char *lease_key, __u8 *oplock, struct smb2_file_all_info *buf, struct create_posix_rsp *posix) diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h index 2336dfb23f36..4662c7e2d259 100644 --- a/fs/smb/client/smb2proto.h +++ b/fs/smb/client/smb2proto.h @@ -283,7 +283,7 @@ extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *, enum securityEnum); int smb2_parse_contexts(struct TCP_Server_Info *server, struct kvec *rsp_iov, - unsigned int *epoch, + __u16 *epoch, char *lease_key, __u8 *oplock, struct smb2_file_all_info *buf, struct create_posix_rsp *posix); diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index 3336df2ea5d4..c7a0efda4403 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1707,23 +1707,33 @@ struct smb2_file_internal_info { } __packed; /* level 6 Query */ struct smb2_file_rename_info { /* encoding of request for level 10 */ - __u8 ReplaceIfExists; /* 1 = replace existing target with new */ - /* 0 = fail if target already exists */ - __u8 Reserved[7]; - __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ - __le32 FileNameLength; + /* New members MUST be added within the struct_group() macro below. */ + __struct_group(smb2_file_rename_info_hdr, __hdr, __packed, + __u8 ReplaceIfExists; /* 1 = replace existing target with new */ + /* 0 = fail if target already exists */ + __u8 Reserved[7]; + __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ + __le32 FileNameLength; + ); char FileName[]; /* New name to be assigned */ /* padding - overall struct size must be >= 24 so filename + pad >= 6 */ } __packed; /* level 10 Set */ +static_assert(offsetof(struct smb2_file_rename_info, FileName) == sizeof(struct smb2_file_rename_info_hdr), + "struct member likely outside of __struct_group()"); struct smb2_file_link_info { /* encoding of request for level 11 */ - __u8 ReplaceIfExists; /* 1 = replace existing link with new */ - /* 0 = fail if link already exists */ - __u8 Reserved[7]; - __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ - __le32 FileNameLength; + /* New members MUST be added within the struct_group() macro below. */ + __struct_group(smb2_file_link_info_hdr, __hdr, __packed, + __u8 ReplaceIfExists; /* 1 = replace existing link with new */ + /* 0 = fail if link already exists */ + __u8 Reserved[7]; + __u64 RootDirectory; /* MBZ for network operations (why says spec?) */ + __le32 FileNameLength; + ); char FileName[]; /* Name to be assigned to new link */ } __packed; /* level 11 Set */ +static_assert(offsetof(struct smb2_file_link_info, FileName) == sizeof(struct smb2_file_link_info_hdr), + "struct member likely outside of __struct_group()"); /* * This level 18, although with struct with same name is different from cifs diff --git a/fs/smb/common/smbfsctl.h b/fs/smb/common/smbfsctl.h index 4b379e84c46b..3253a18ecb5c 100644 --- a/fs/smb/common/smbfsctl.h +++ b/fs/smb/common/smbfsctl.h @@ -159,6 +159,9 @@ #define IO_REPARSE_TAG_LX_CHR 0x80000025 #define IO_REPARSE_TAG_LX_BLK 0x80000026 +/* If Name Surrogate Bit is set, the file or directory represents another named entity in the system. */ +#define IS_REPARSE_TAG_NAME_SURROGATE(tag) (!!((tag) & 0x20000000)) + /* fsctl flags */ /* If Flags is set to this value, the request is an FSCTL not ioctl request */ #define SMB2_0_IOCTL_IS_FSCTL 0x00000001 diff --git a/fs/stat.c b/fs/stat.c index 2c0e111a098a..f13308bfdc98 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -281,6 +281,8 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat, u32 request_mask) { int error = vfs_getattr(path, stat, request_mask, flags); + if (error) + return error; if (request_mask & STATX_MNT_ID_UNIQUE) { stat->mnt_id = real_mount(path->mnt)->mnt_id_unique; @@ -302,7 +304,7 @@ static int vfs_statx_path(struct path *path, int flags, struct kstat *stat, if (S_ISBLK(stat->mode)) bdev_statx(path, stat, request_mask); - return error; + return 0; } static int vfs_statx_fd(int fd, int flags, struct kstat *stat, diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index e95b8a48d8a0..1d94bb784108 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -21,7 +21,8 @@ #define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ -static const unsigned char VBSF_MOUNT_SIGNATURE[4] = "\000\377\376\375"; +static const unsigned char VBSF_MOUNT_SIGNATURE[4] = { '\000', '\377', '\376', + '\375' }; static int follow_symlinks; module_param(follow_symlinks, int, 0444); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 40ad22fb808b..0ef19f1469ec 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3563,12 +3563,12 @@ xfs_bmap_btalloc_at_eof( int error; /* - * If there are already extents in the file, try an exact EOF block - * allocation to extend the file as a contiguous extent. If that fails, - * or it's the first allocation in a file, just try for a stripe aligned - * allocation. + * If there are already extents in the file, and xfs_bmap_adjacent() has + * given a better blkno, try an exact EOF block allocation to extend the + * file as a contiguous extent. If that fails, or it's the first + * allocation in a file, just try for a stripe aligned allocation. */ - if (ap->offset) { + if (ap->eof) { xfs_extlen_t nextminlen = 0; /* @@ -3736,7 +3736,8 @@ xfs_bmap_btalloc_best_length( int error; ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino); - xfs_bmap_adjacent(ap); + if (!xfs_bmap_adjacent(ap)) + ap->eof = false; /* * Search for an allocation group with a single extent large enough for diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index bdcd40f0ec74..19877d99f255 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -224,7 +224,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm) bool xchk_dir_looks_zapped(struct xfs_inode *dp); bool xchk_pptr_looks_zapped(struct xfs_inode *ip); -#ifdef CONFIG_XFS_ONLINE_REPAIR /* Decide if a repair is required. */ static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm) { @@ -244,10 +243,6 @@ static inline bool xchk_could_repair(const struct xfs_scrub *sc) return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) && !(sc->flags & XREP_ALREADY_FIXED); } -#else -# define xchk_needs_repair(sc) (false) -# define xchk_could_repair(sc) (false) -#endif /* CONFIG_XFS_ONLINE_REPAIR */ int xchk_metadata_inode_forks(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 2f641b6d663e..13ff1c933cb8 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -1055,9 +1055,17 @@ xrep_dinode_check_dfork( return true; break; case S_IFREG: - if (fmt == XFS_DINODE_FMT_LOCAL) + switch (fmt) { + case XFS_DINODE_FMT_LOCAL: return true; - fallthrough; + case XFS_DINODE_FMT_EXTENTS: + case XFS_DINODE_FMT_BTREE: + case XFS_DINODE_FMT_META_BTREE: + break; + default: + return true; + } + break; case S_IFLNK: case S_IFDIR: switch (fmt) { diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 823c00d1a502..af0a3a9e5ed9 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -191,7 +191,16 @@ int xrep_reset_metafile_resv(struct xfs_scrub *sc); #else #define xrep_ino_dqattach(sc) (0) -#define xrep_will_attempt(sc) (false) + +/* + * When online repair is not built into the kernel, we still want to attempt + * the repair so that the stub xrep_attempt below will return EOPNOTSUPP. + */ +static inline bool xrep_will_attempt(const struct xfs_scrub *sc) +{ + return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) || + xchk_needs_repair(sc->sm); +} static inline int xrep_attempt( diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 7567dd5cad14..6fa9e3e5bab7 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -149,6 +149,18 @@ xchk_probe( if (xchk_should_terminate(sc, &error)) return error; + /* + * If the caller is probing to see if repair works but repair isn't + * built into the kernel, return EOPNOTSUPP because that's the signal + * that userspace expects. If online repair is built in, set the + * CORRUPT flag (without any of the usual tracing/logging) to force us + * into xrep_probe. + */ + if (xchk_could_repair(sc)) { + if (!IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)) + return -EOPNOTSUPP; + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + } return 0; } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 67877c36ed11..6d9965b546cb 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -19,6 +19,7 @@ #include "xfs_reflink.h" #include "xfs_errortag.h" #include "xfs_error.h" +#include "xfs_icache.h" struct xfs_writepage_ctx { struct iomap_writepage_ctx ctx; @@ -528,12 +529,44 @@ xfs_vm_readahead( } static int -xfs_iomap_swapfile_activate( +xfs_vm_swap_activate( struct swap_info_struct *sis, struct file *swap_file, sector_t *span) { - sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; + struct xfs_inode *ip = XFS_I(file_inode(swap_file)); + + /* + * Swap file activation can race against concurrent shared extent + * removal in files that have been cloned. If this happens, + * iomap_swapfile_iter() can fail because it encountered a shared + * extent even though an operation is in progress to remove those + * shared extents. + * + * This race becomes problematic when we defer extent removal + * operations beyond the end of a syscall (i.e. use async background + * processing algorithms). Users think the extents are no longer + * shared, but iomap_swapfile_iter() still sees them as shared + * because the refcountbt entries for the extents being removed have + * not yet been updated. Hence the swapon call fails unexpectedly. + * + * The race condition is currently most obvious from the unlink() + * operation as extent removal is deferred until after the last + * reference to the inode goes away. We then process the extent + * removal asynchronously, hence triggers the "syscall completed but + * work not done" condition mentioned above. To close this race + * window, we need to flush any pending inodegc operations to ensure + * they have updated the refcountbt records before we try to map the + * swapfile. + */ + xfs_inodegc_flush(ip->i_mount); + + /* + * Direct the swap code to the correct block device when this file + * sits on the RT device. + */ + sis->bdev = xfs_inode_buftarg(ip)->bt_bdev; + return iomap_swapfile_activate(sis, swap_file, span, &xfs_read_iomap_ops); } @@ -549,11 +582,11 @@ const struct address_space_operations xfs_address_space_operations = { .migrate_folio = filemap_migrate_folio, .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_folio = generic_error_remove_folio, - .swap_activate = xfs_iomap_swapfile_activate, + .swap_activate = xfs_vm_swap_activate, }; const struct address_space_operations xfs_dax_aops = { .writepages = xfs_dax_writepages, .dirty_folio = noop_dirty_folio, - .swap_activate = xfs_iomap_swapfile_activate, + .swap_activate = xfs_vm_swap_activate, }; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d1d4a0a22e13..15bb790359f8 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache; * * xfs_buf_rele: * b_lock - * pag_buf_lock - * lru_lock + * lru_lock * * xfs_buftarg_drain_rele * lru_lock @@ -220,23 +219,25 @@ _xfs_buf_alloc( */ flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD); - spin_lock_init(&bp->b_lock); + /* + * A new buffer is held and locked by the owner. This ensures that the + * buffer is owned by the caller and racing RCU lookups right after + * inserting into the hash table are safe (and will have to wait for + * the unlock to do anything non-trivial). + */ bp->b_hold = 1; + sema_init(&bp->b_sema, 0); /* held, no waiters */ + + spin_lock_init(&bp->b_lock); atomic_set(&bp->b_lru_ref, 1); init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_lru); INIT_LIST_HEAD(&bp->b_list); INIT_LIST_HEAD(&bp->b_li_list); - sema_init(&bp->b_sema, 0); /* held, no waiters */ bp->b_target = target; bp->b_mount = target->bt_mount; bp->b_flags = flags; - /* - * Set length and io_length to the same value initially. - * I/O routines should use io_length, which will be the same in - * most cases but may be reset (e.g. XFS recovery). - */ error = xfs_buf_get_maps(bp, nmaps); if (error) { kmem_cache_free(xfs_buf_cache, bp); @@ -502,7 +503,6 @@ int xfs_buf_cache_init( struct xfs_buf_cache *bch) { - spin_lock_init(&bch->bc_lock); return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params); } @@ -652,17 +652,20 @@ xfs_buf_find_insert( if (error) goto out_free_buf; - spin_lock(&bch->bc_lock); + /* The new buffer keeps the perag reference until it is freed. */ + new_bp->b_pag = pag; + + rcu_read_lock(); bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash, &new_bp->b_rhash_head, xfs_buf_hash_params); if (IS_ERR(bp)) { + rcu_read_unlock(); error = PTR_ERR(bp); - spin_unlock(&bch->bc_lock); goto out_free_buf; } if (bp && xfs_buf_try_hold(bp)) { /* found an existing buffer */ - spin_unlock(&bch->bc_lock); + rcu_read_unlock(); error = xfs_buf_find_lock(bp, flags); if (error) xfs_buf_rele(bp); @@ -670,10 +673,8 @@ xfs_buf_find_insert( *bpp = bp; goto out_free_buf; } + rcu_read_unlock(); - /* The new buffer keeps the perag reference until it is freed. */ - new_bp->b_pag = pag; - spin_unlock(&bch->bc_lock); *bpp = new_bp; return 0; @@ -1090,7 +1091,6 @@ xfs_buf_rele_cached( } /* we are asked to drop the last reference */ - spin_lock(&bch->bc_lock); __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* @@ -1102,7 +1102,6 @@ xfs_buf_rele_cached( bp->b_state &= ~XFS_BSTATE_DISPOSE; else bp->b_hold--; - spin_unlock(&bch->bc_lock); } else { bp->b_hold--; /* @@ -1120,7 +1119,6 @@ xfs_buf_rele_cached( ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head, xfs_buf_hash_params); - spin_unlock(&bch->bc_lock); if (pag) xfs_perag_put(pag); freebuf = true; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7e73663c5d4a..3b4ed42e11c0 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t; #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ struct xfs_buf_cache { - spinlock_t bc_lock; struct rhashtable bc_hash; }; diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index f340a2015c4c..0b41bdfecdfb 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -329,22 +329,6 @@ out_trans_cancel: * successfully but before locks are dropped. */ -/* Verify that we have security clearance to perform this operation. */ -static int -xfs_exchange_range_verify_area( - struct xfs_exchrange *fxr) -{ - int ret; - - ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, - true); - if (ret) - return ret; - - return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, - true); -} - /* * Performs necessary checks before doing a range exchange, having stabilized * mutable inode attributes via i_rwsem. @@ -355,11 +339,13 @@ xfs_exchange_range_checks( unsigned int alloc_unit) { struct inode *inode1 = file_inode(fxr->file1); + loff_t size1 = i_size_read(inode1); struct inode *inode2 = file_inode(fxr->file2); + loff_t size2 = i_size_read(inode2); uint64_t allocmask = alloc_unit - 1; int64_t test_len; uint64_t blen; - loff_t size1, size2, tmp; + loff_t tmp; int error; /* Don't touch certain kinds of inodes */ @@ -368,24 +354,25 @@ xfs_exchange_range_checks( if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) return -ETXTBSY; - size1 = i_size_read(inode1); - size2 = i_size_read(inode2); - /* Ranges cannot start after EOF. */ if (fxr->file1_offset > size1 || fxr->file2_offset > size2) return -EINVAL; - /* - * If the caller said to exchange to EOF, we set the length of the - * request large enough to cover everything to the end of both files. - */ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { + /* + * If the caller said to exchange to EOF, we set the length of + * the request large enough to cover everything to the end of + * both files. + */ fxr->length = max_t(int64_t, size1 - fxr->file1_offset, size2 - fxr->file2_offset); - - error = xfs_exchange_range_verify_area(fxr); - if (error) - return error; + } else { + /* + * Otherwise we require both ranges to end within EOF. + */ + if (fxr->file1_offset + fxr->length > size1 || + fxr->file2_offset + fxr->length > size2) + return -EINVAL; } /* @@ -402,15 +389,6 @@ xfs_exchange_range_checks( return -EINVAL; /* - * We require both ranges to end within EOF, unless we're exchanging - * to EOF. - */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && - (fxr->file1_offset + fxr->length > size1 || - fxr->file2_offset + fxr->length > size2)) - return -EINVAL; - - /* * Make sure we don't hit any file size limits. If we hit any size * limits such that test_length was adjusted, we abort the whole * operation. @@ -747,6 +725,7 @@ xfs_exchange_range( { struct inode *inode1 = file_inode(fxr->file1); struct inode *inode2 = file_inode(fxr->file2); + loff_t check_len = fxr->length; int ret; BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & @@ -779,14 +758,18 @@ xfs_exchange_range( return -EBADF; /* - * If we're not exchanging to EOF, we can check the areas before - * stabilizing both files' i_size. + * If we're exchanging to EOF we can't calculate the length until taking + * the iolock. Pass a 0 length to remap_verify_area similar to the + * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well. */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { - ret = xfs_exchange_range_verify_area(fxr); - if (ret) - return ret; - } + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) + check_len = 0; + ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true); + if (ret) + return ret; + ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true); + if (ret) + return ret; /* Update cmtime if the fd/inode don't forbid it. */ if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c95fe1b1de4e..b1f9f156ec88 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1404,8 +1404,11 @@ xfs_inactive( goto out; /* Try to clean out the cow blocks if there are any. */ - if (xfs_inode_has_cow_data(ip)) - xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (xfs_inode_has_cow_data(ip)) { + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (error) + goto out; + } if (VFS_I(ip)->i_nlink != 0) { /* diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 50fa3ef89f6c..d61460309a78 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -976,10 +976,8 @@ xfs_dax_write_iomap_end( if (!xfs_is_cow_inode(ip)) return 0; - if (!written) { - xfs_reflink_cancel_cow_range(ip, pos, length, true); - return 0; - } + if (!written) + return xfs_reflink_cancel_cow_range(ip, pos, length, true); return xfs_reflink_end_cow(ip, pos, written); } diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 37f1230e7584..245d754f382a 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -78,6 +78,28 @@ xfs_qm_statvfs( } } +STATIC int +xfs_qm_validate_state_change( + struct xfs_mount *mp, + uint uqd, + uint gqd, + uint pqd) +{ + int state; + + /* Is quota state changing? */ + state = ((uqd && !XFS_IS_UQUOTA_ON(mp)) || + (!uqd && XFS_IS_UQUOTA_ON(mp)) || + (gqd && !XFS_IS_GQUOTA_ON(mp)) || + (!gqd && XFS_IS_GQUOTA_ON(mp)) || + (pqd && !XFS_IS_PQUOTA_ON(mp)) || + (!pqd && XFS_IS_PQUOTA_ON(mp))); + + return state && + (xfs_dev_is_read_only(mp, "changing quota state") || + xfs_has_norecovery(mp)); +} + int xfs_qm_newmount( xfs_mount_t *mp, @@ -97,24 +119,25 @@ xfs_qm_newmount( } /* - * If the device itself is read-only, we can't allow - * the user to change the state of quota on the mount - - * this would generate a transaction on the ro device, - * which would lead to an I/O error and shutdown + * If the device itself is read-only and/or in norecovery + * mode, we can't allow the user to change the state of + * quota on the mount - this would generate a transaction + * on the ro device, which would lead to an I/O error and + * shutdown. */ - if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || - (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || - (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || - (!gquotaondisk && XFS_IS_GQUOTA_ON(mp)) || - (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || - (!pquotaondisk && XFS_IS_PQUOTA_ON(mp))) && - xfs_dev_is_read_only(mp, "changing quota state")) { - xfs_warn(mp, "please mount with%s%s%s%s.", - (!quotaondisk ? "out quota" : ""), - (uquotaondisk ? " usrquota" : ""), - (gquotaondisk ? " grpquota" : ""), - (pquotaondisk ? " prjquota" : "")); + if (xfs_qm_validate_state_change(mp, uquotaondisk, + gquotaondisk, pquotaondisk)) { + + if (xfs_has_metadir(mp)) + xfs_warn(mp, + "metadir enabled, please mount without any quota mount options"); + else + xfs_warn(mp, "please mount with%s%s%s%s.", + (!quotaondisk ? "out quota" : ""), + (uquotaondisk ? " usrquota" : ""), + (gquotaondisk ? " grpquota" : ""), + (pquotaondisk ? " prjquota" : "")); return -EPERM; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d92d7a07ea89..0055066fb1d9 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1661,8 +1661,12 @@ xfs_fs_fill_super( #endif } - /* Filesystem claims it needs repair, so refuse the mount. */ - if (xfs_has_needsrepair(mp)) { + /* + * Filesystem claims it needs repair, so refuse the mount unless + * norecovery is also specified, in which case the filesystem can + * be mounted with no risk of further damage. + */ + if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) { xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); error = -EFSCORRUPTED; goto out_free_sb; diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index f42133dae68e..2afc95bf1655 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -90,7 +90,7 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, #ifndef __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) + unsigned long addr, pte_t *ptep, unsigned long sz) { return ptep_get_and_clear(mm, addr, ptep); } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 54504013c749..0d5b186abee8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -457,7 +457,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) . = ALIGN((align)); \ .rodata : AT(ADDR(.rodata) - LOAD_OFFSET) { \ __start_rodata = .; \ - *(.rodata) *(.rodata.*) \ + *(.rodata) *(.rodata.*) *(.data.rel.ro*) \ SCHED_DATA \ RO_AFTER_INIT_DATA /* Read only after init */ \ . = ALIGN(8); \ @@ -1038,6 +1038,7 @@ defined(CONFIG_AUTOFDO_CLANG) || defined(CONFIG_PROPELLER_CLANG) *(.discard) \ *(.discard.*) \ *(.export_symbol) \ + *(.no_trim_symbol) \ *(.modinfo) \ /* ld.bfd warns about .gnu.version* even when not emitted */ \ *(.gnu.version*) \ diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index a6f8b098c56f..3bd9f482f0c3 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -359,6 +359,7 @@ # define DP_DSC_BITS_PER_PIXEL_1_4 0x2 # define DP_DSC_BITS_PER_PIXEL_1_2 0x3 # define DP_DSC_BITS_PER_PIXEL_1_1 0x4 +# define DP_DSC_BITS_PER_PIXEL_MASK 0x7 #define DP_PSR_SUPPORT 0x070 /* XXX 1.2? */ # define DP_PSR_IS_SUPPORTED 1 diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index f77fe1531cf8..9732f514566d 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -32,6 +32,7 @@ #include <linux/dynamic_debug.h> #include <drm/drm.h> +#include <drm/drm_device.h> struct debugfs_regset32; struct drm_device; diff --git a/include/dt-bindings/clock/qcom,qcs8300-camcc.h b/include/dt-bindings/clock/qcom,qcs8300-camcc.h new file mode 100644 index 000000000000..fc535c847859 --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcs8300-camcc.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_QCS8300_CAM_CC_H +#define _DT_BINDINGS_CLK_QCOM_QCS8300_CAM_CC_H + +#include "qcom,sa8775p-camcc.h" + +/* QCS8300 introduces below new clocks compared to SA8775P */ + +/* CAM_CC clocks */ +#define CAM_CC_TITAN_TOP_ACCU_SHIFT_CLK 86 + +#endif diff --git a/include/dt-bindings/clock/qcom,qcs8300-gpucc.h b/include/dt-bindings/clock/qcom,qcs8300-gpucc.h new file mode 100644 index 000000000000..afa187467b4c --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcs8300-gpucc.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPUCC_QCS8300_H +#define _DT_BINDINGS_CLK_QCOM_GPUCC_QCS8300_H + +#include "qcom,sa8775p-gpucc.h" + +/* QCS8300 introduces below new clocks compared to SA8775P */ + +/* GPU_CC clocks */ +#define GPU_CC_CX_ACCU_SHIFT_CLK 23 +#define GPU_CC_GX_ACCU_SHIFT_CLK 24 + +#endif diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9ebb53f031cd..fa2a76cc2f73 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -861,12 +861,22 @@ static inline bool blk_mq_add_to_batch(struct request *req, void (*complete)(struct io_comp_batch *)) { /* - * blk_mq_end_request_batch() can't end request allocated from - * sched tags + * Check various conditions that exclude batch processing: + * 1) No batch container + * 2) Has scheduler data attached + * 3) Not a passthrough request and end_io set + * 4) Not a passthrough request and an ioerror */ - if (!iob || (req->rq_flags & RQF_SCHED_TAGS) || ioerror || - (req->end_io && !blk_rq_is_passthrough(req))) + if (!iob) return false; + if (req->rq_flags & RQF_SCHED_TAGS) + return false; + if (!blk_rq_is_passthrough(req)) { + if (req->end_io) + return false; + if (ioerror < 0) + return false; + } if (!iob->complete) iob->complete = complete; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 248416ecd01c..d37751789bf5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,10 +196,11 @@ struct gendisk { unsigned int zone_capacity; unsigned int last_zone_capacity; unsigned long __rcu *conv_zones_bitmap; - unsigned int zone_wplugs_hash_bits; - spinlock_t zone_wplugs_lock; + unsigned int zone_wplugs_hash_bits; + atomic_t nr_zone_wplugs; + spinlock_t zone_wplugs_lock; struct mempool_s *zone_wplugs_pool; - struct hlist_head *zone_wplugs_hash; + struct hlist_head *zone_wplugs_hash; struct workqueue_struct *zone_wplugs_wq; #endif /* CONFIG_BLK_DEV_ZONED */ @@ -367,6 +368,7 @@ struct queue_limits { unsigned int max_sectors; unsigned int max_user_sectors; unsigned int max_segment_size; + unsigned int min_segment_size; unsigned int physical_block_size; unsigned int logical_block_size; unsigned int alignment_offset; diff --git a/include/linux/call_once.h b/include/linux/call_once.h index 6261aa0b3fb0..13cd6469e7e5 100644 --- a/include/linux/call_once.h +++ b/include/linux/call_once.h @@ -26,20 +26,41 @@ do { \ __once_init((once), #once, &__key); \ } while (0) -static inline void call_once(struct once *once, void (*cb)(struct once *)) +/* + * call_once - Ensure a function has been called exactly once + * + * @once: Tracking struct + * @cb: Function to be called + * + * If @once has never completed successfully before, call @cb and, if + * it returns a zero or positive value, mark @once as completed. Return + * the value returned by @cb + * + * If @once has completed succesfully before, return 0. + * + * The call to @cb is implicitly surrounded by a mutex, though for + * efficiency the * function avoids taking it after the first call. + */ +static inline int call_once(struct once *once, int (*cb)(struct once *)) { - /* Pairs with atomic_set_release() below. */ - if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) - return; - - guard(mutex)(&once->lock); - WARN_ON(atomic_read(&once->state) == ONCE_RUNNING); - if (atomic_read(&once->state) != ONCE_NOT_STARTED) - return; - - atomic_set(&once->state, ONCE_RUNNING); - cb(once); - atomic_set_release(&once->state, ONCE_COMPLETED); + int r, state; + + /* Pairs with atomic_set_release() below. */ + if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) + return 0; + + guard(mutex)(&once->lock); + state = atomic_read(&once->state); + if (unlikely(state != ONCE_NOT_STARTED)) + return WARN_ON_ONCE(state != ONCE_COMPLETED) ? -EINVAL : 0; + + atomic_set(&once->state, ONCE_RUNNING); + r = cb(once); + if (r < 0) + atomic_set(&once->state, ONCE_NOT_STARTED); + else + atomic_set_release(&once->state, ONCE_COMPLETED); + return r; } #endif /* _LINUX_CALL_ONCE_H */ diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 1b20d2d8ef7c..17960a1e858d 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -71,9 +71,6 @@ enum { /* Cgroup is frozen. */ CGRP_FROZEN, - - /* Control group has to be killed. */ - CGRP_KILL, }; /* cgroup_root->flags */ @@ -461,6 +458,9 @@ struct cgroup { int nr_threaded_children; /* # of live threaded child cgroups */ + /* sequence number for cgroup.kill, serialized by css_set_lock. */ + unsigned int kill_seq; + struct kernfs_node *kn; /* cgroup kernfs entry */ struct cgroup_file procs_file; /* handle for "cgroup.procs" */ struct cgroup_file events_file; /* handle for "cgroup.events" */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index b087de2f3e94..155385754824 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -110,7 +110,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Unreachable code */ #ifdef CONFIG_OBJTOOL /* Annotate a C jump table to allow objtool to follow the code flow */ -#define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #") +#define __annotate_jump_table __section(".data.rel.ro.c_jump_table") #else /* !CONFIG_OBJTOOL */ #define __annotate_jump_table #endif /* CONFIG_OBJTOOL */ @@ -191,6 +191,25 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, __v; \ }) +#ifdef __CHECKER__ +#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0) +#else /* __CHECKER__ */ +#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif /* __CHECKER__ */ + +/* &a[0] degrades to a pointer: a different type from an array */ +#define __is_array(a) (!__same_type((a), &(a)[0])) +#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_array(a), \ + "must be array") + +#define __is_byte_array(a) (__is_array(a) && sizeof((a)[0]) == 1) +#define __must_be_byte_array(a) __BUILD_BUG_ON_ZERO_MSG(!__is_byte_array(a), \ + "must be byte array") + +/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ +#define __must_be_cstr(p) \ + __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)") + #endif /* __KERNEL__ */ /** @@ -231,19 +250,6 @@ static inline void *offset_to_ptr(const int *off) #define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym)) -#ifdef __CHECKER__ -#define __BUILD_BUG_ON_ZERO_MSG(e, msg) (0) -#else /* __CHECKER__ */ -#define __BUILD_BUG_ON_ZERO_MSG(e, msg) ((int)sizeof(struct {_Static_assert(!(e), msg);})) -#endif /* __CHECKER__ */ - -/* &a[0] degrades to a pointer: a different type from an array */ -#define __must_be_array(a) __BUILD_BUG_ON_ZERO_MSG(__same_type((a), &(a)[0]), "must be array") - -/* Require C Strings (i.e. NUL-terminated) lack the "nonstring" attribute. */ -#define __must_be_cstr(p) \ - __BUILD_BUG_ON_ZERO_MSG(__annotated(p, nonstring), "must be cstr (NUL-terminated)") - /* * This returns a constant expression while determining if an argument is * a constant expression, most importantly without evaluating the argument. diff --git a/include/linux/device/faux.h b/include/linux/device/faux.h new file mode 100644 index 000000000000..9f43c0e46aa4 --- /dev/null +++ b/include/linux/device/faux.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2025 Greg Kroah-Hartman <gregkh@linuxfoundation.org> + * Copyright (c) 2025 The Linux Foundation + * + * A "simple" faux bus that allows devices to be created and added + * automatically to it. This is to be used whenever you need to create a + * device that is not associated with any "real" system resources, and do + * not want to have to deal with a bus/driver binding logic. It is + * intended to be very simple, with only a create and a destroy function + * available. + */ +#ifndef _FAUX_DEVICE_H_ +#define _FAUX_DEVICE_H_ + +#include <linux/container_of.h> +#include <linux/device.h> + +/** + * struct faux_device - a "faux" device + * @dev: internal struct device of the object + * + * A simple faux device that can be created/destroyed. To be used when a + * driver only needs to have a device to "hang" something off. This can be + * used for downloading firmware or other basic tasks. Use this instead of + * a struct platform_device if the device has no resources assigned to + * it at all. + */ +struct faux_device { + struct device dev; +}; +#define to_faux_device(x) container_of_const((x), struct faux_device, dev) + +/** + * struct faux_device_ops - a set of callbacks for a struct faux_device + * @probe: called when a faux device is probed by the driver core + * before the device is fully bound to the internal faux bus + * code. If probe succeeds, return 0, otherwise return a + * negative error number to stop the probe sequence from + * succeeding. + * @remove: called when a faux device is removed from the system + * + * Both @probe and @remove are optional, if not needed, set to NULL. + */ +struct faux_device_ops { + int (*probe)(struct faux_device *faux_dev); + void (*remove)(struct faux_device *faux_dev); +}; + +struct faux_device *faux_device_create(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops); +struct faux_device *faux_device_create_with_groups(const char *name, + struct device *parent, + const struct faux_device_ops *faux_ops, + const struct attribute_group **groups); +void faux_device_destroy(struct faux_device *faux_dev); + +static inline void *faux_device_get_drvdata(const struct faux_device *faux_dev) +{ + return dev_get_drvdata(&faux_dev->dev); +} + +static inline void faux_device_set_drvdata(struct faux_device *faux_dev, void *data) +{ + dev_set_drvdata(&faux_dev->dev, data); +} + +#endif /* _FAUX_DEVICE_H_ */ diff --git a/include/linux/efi.h b/include/linux/efi.h index 053c57e61869..7d63d1d75f22 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -114,21 +114,22 @@ typedef struct { #define EFI_MAX_MEMORY_TYPE 16 /* Attribute values: */ -#define EFI_MEMORY_UC ((u64)0x0000000000000001ULL) /* uncached */ -#define EFI_MEMORY_WC ((u64)0x0000000000000002ULL) /* write-coalescing */ -#define EFI_MEMORY_WT ((u64)0x0000000000000004ULL) /* write-through */ -#define EFI_MEMORY_WB ((u64)0x0000000000000008ULL) /* write-back */ -#define EFI_MEMORY_UCE ((u64)0x0000000000000010ULL) /* uncached, exported */ -#define EFI_MEMORY_WP ((u64)0x0000000000001000ULL) /* write-protect */ -#define EFI_MEMORY_RP ((u64)0x0000000000002000ULL) /* read-protect */ -#define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */ -#define EFI_MEMORY_NV ((u64)0x0000000000008000ULL) /* non-volatile */ -#define EFI_MEMORY_MORE_RELIABLE \ - ((u64)0x0000000000010000ULL) /* higher reliability */ -#define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ -#define EFI_MEMORY_SP ((u64)0x0000000000040000ULL) /* soft reserved */ -#define EFI_MEMORY_CPU_CRYPTO ((u64)0x0000000000080000ULL) /* supports encryption */ -#define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */ +#define EFI_MEMORY_UC BIT_ULL(0) /* uncached */ +#define EFI_MEMORY_WC BIT_ULL(1) /* write-coalescing */ +#define EFI_MEMORY_WT BIT_ULL(2) /* write-through */ +#define EFI_MEMORY_WB BIT_ULL(3) /* write-back */ +#define EFI_MEMORY_UCE BIT_ULL(4) /* uncached, exported */ +#define EFI_MEMORY_WP BIT_ULL(12) /* write-protect */ +#define EFI_MEMORY_RP BIT_ULL(13) /* read-protect */ +#define EFI_MEMORY_XP BIT_ULL(14) /* execute-protect */ +#define EFI_MEMORY_NV BIT_ULL(15) /* non-volatile */ +#define EFI_MEMORY_MORE_RELIABLE BIT_ULL(16) /* higher reliability */ +#define EFI_MEMORY_RO BIT_ULL(17) /* read-only */ +#define EFI_MEMORY_SP BIT_ULL(18) /* soft reserved */ +#define EFI_MEMORY_CPU_CRYPTO BIT_ULL(19) /* supports encryption */ +#define EFI_MEMORY_HOT_PLUGGABLE BIT_ULL(20) /* supports unplugging at runtime */ +#define EFI_MEMORY_RUNTIME BIT_ULL(63) /* range requires runtime mapping */ + #define EFI_MEMORY_DESCRIPTOR_VERSION 1 #define EFI_PAGE_SHIFT 12 diff --git a/include/linux/fs.h b/include/linux/fs.h index be3ad155ec9f..2788df98080f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -222,7 +222,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_FSNOTIFY_HSM(mode) 0 #endif - /* * Attribute flags. These should be or-ed together to figure out what * has been changed! @@ -791,6 +790,19 @@ struct inode { static inline void inode_set_cached_link(struct inode *inode, char *link, int linklen) { + int testlen; + + /* + * TODO: patch it into a debug-only check if relevant macros show up. + * In the meantime, since we are suffering strlen even on production kernels + * to find the right length, do a fixup if the wrong value got passed. + */ + testlen = strlen(link); + if (testlen != linklen) { + WARN_ONCE(1, "bad length passed for symlink [%s] (got %d, expected %d)", + link, linklen, testlen); + linklen = testlen; + } inode->i_link = link; inode->i_linklen = linklen; inode->i_opflags |= IOP_CACHED_LINK; @@ -2963,8 +2975,8 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) } else if (iocb->ki_flags & IOCB_DONTCACHE) { struct address_space *mapping = iocb->ki_filp->f_mapping; - filemap_fdatawrite_range_kick(mapping, iocb->ki_pos, - iocb->ki_pos + count); + filemap_fdatawrite_range_kick(mapping, iocb->ki_pos - count, + iocb->ki_pos - 1); } return count; @@ -3140,6 +3152,12 @@ static inline void exe_file_allow_write_access(struct file *exe_file) allow_write_access(exe_file); } +static inline void file_set_fsnotify_mode(struct file *file, fmode_t mode) +{ + file->f_mode &= ~FMODE_FSNOTIFY_MASK; + file->f_mode |= mode; +} + static inline bool inode_is_open_for_write(const struct inode *inode) { return atomic_read(&inode->i_writecount) > 0; @@ -3434,6 +3452,8 @@ extern const struct file_operations generic_ro_fops; extern int readlink_copy(char __user *, int, const char *, int); extern int page_readlink(struct dentry *, char __user *, int); +extern const char *page_get_link_raw(struct dentry *, struct inode *, + struct delayed_call *); extern const char *page_get_link(struct dentry *, struct inode *, struct delayed_call *); extern void page_put_link(void *); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 1a9ef8f6784d..6a33288bd6a1 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -129,7 +129,7 @@ static inline int fsnotify_file(struct file *file, __u32 mask) #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS -void file_set_fsnotify_mode(struct file *file); +void file_set_fsnotify_mode_from_watchers(struct file *file); /* * fsnotify_file_area_perm - permission hook before access to file range @@ -213,7 +213,7 @@ static inline int fsnotify_open_perm(struct file *file) } #else -static inline void file_set_fsnotify_mode(struct file *file) +static inline void file_set_fsnotify_mode_from_watchers(struct file *file) { } diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index c3b4b7ed7c16..84a5045f80f3 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -125,6 +125,7 @@ struct hrtimer_cpu_base { ktime_t softirq_expires_next; struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + call_single_data_t csd; } ____cacheline_aligned; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ec8c0ccc8f95..bf5f7256bd28 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -1004,7 +1004,9 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm) static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); + unsigned long psize = huge_page_size(hstate_vma(vma)); + + return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); } #endif diff --git a/include/linux/i2c.h b/include/linux/i2c.h index c31fd1dba3bd..2b2af24d2a43 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -244,6 +244,7 @@ enum i2c_driver_flags { * @id_table: List of I2C devices supported by this driver * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) + * @clients: List of detected clients we created (for i2c-core use only) * @flags: A bitmask of flags defined in &enum i2c_driver_flags * * The driver.owner field should be set to the module owner of this driver. @@ -298,6 +299,7 @@ struct i2c_driver { /* Device detection callback for automatic device creation */ int (*detect)(struct i2c_client *client, struct i2c_board_info *info); const unsigned short *address_list; + struct list_head clients; u32 flags; }; @@ -313,6 +315,8 @@ struct i2c_driver { * @dev: Driver model device node for the slave. * @init_irq: IRQ that was set at initialization * @irq: indicates the IRQ generated by this device (if any) + * @detected: member of an i2c_driver.clients list or i2c-core's + * userspace_devices list * @slave_cb: Callback when I2C slave mode of an adapter is used. The adapter * calls it to pass on slave events to the slave driver. * @devres_group_id: id of the devres group that will be created for resources @@ -332,8 +336,6 @@ struct i2c_client { #define I2C_CLIENT_SLAVE 0x20 /* we are the slave */ #define I2C_CLIENT_HOST_NOTIFY 0x40 /* We want to use I2C host notify */ #define I2C_CLIENT_WAKE 0x80 /* for board_info; true iff can wake */ -#define I2C_CLIENT_AUTO 0x100 /* client was auto-detected */ -#define I2C_CLIENT_USER 0x200 /* client was userspace-created */ #define I2C_CLIENT_SCCB 0x9000 /* Use Omnivision SCCB protocol */ /* Must match I2C_M_STOP|IGNORE_NAK */ @@ -345,6 +347,7 @@ struct i2c_client { struct device dev; /* the device structure */ int init_irq; /* irq set at initialization */ int irq; /* irq issued by device */ + struct list_head detected; #if IS_ENABLED(CONFIG_I2C_SLAVE) i2c_slave_cb_t slave_cb; /* callback for slave mode */ #endif @@ -751,6 +754,9 @@ struct i2c_adapter { char name[48]; struct completion dev_released; + struct mutex userspace_clients_lock; + struct list_head userspace_clients; + struct i2c_bus_recovery_info *bus_recovery_info; const struct i2c_adapter_quirks *quirks; diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index ed945f42e064..0ea8c9887429 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -537,7 +537,7 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) * * Return: jiffies value */ -#define secs_to_jiffies(_secs) ((_secs) * HZ) +#define secs_to_jiffies(_secs) (unsigned long)((_secs) * HZ) extern unsigned long __usecs_to_jiffies(const unsigned int u); #if !(USEC_PER_SEC % HZ) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3cb9a32a6330..f34f4cfaa513 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1615,7 +1615,6 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); -int kvm_arch_post_init_vm(struct kvm *kvm); void kvm_arch_pre_destroy_vm(struct kvm *kvm); void kvm_arch_create_vm_debugfs(struct kvm *kvm); diff --git a/include/linux/lockref.h b/include/linux/lockref.h index c39f119659ba..676721ee878d 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -37,12 +37,13 @@ struct lockref { /** * lockref_init - Initialize a lockref * @lockref: pointer to lockref structure - * @count: initial count + * + * Initializes @lockref->count to 1. */ -static inline void lockref_init(struct lockref *lockref, unsigned int count) +static inline void lockref_init(struct lockref *lockref) { spin_lock_init(&lockref->lock); - lockref->count = count; + lockref->count = 1; } void lockref_get(struct lockref *lockref); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6b27db7f9496..0234f14f2aa6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -875,10 +875,11 @@ struct mm_struct { */ unsigned int nr_cpus_allowed; /** - * @max_nr_cid: Maximum number of concurrency IDs allocated. + * @max_nr_cid: Maximum number of allowed concurrency + * IDs allocated. * - * Track the highest number of concurrency IDs allocated for the - * mm. + * Track the highest number of allowed concurrency IDs + * allocated for the mm. */ atomic_t max_nr_cid; /** diff --git a/include/linux/module.h b/include/linux/module.h index 23792d5d7b74..30e5b19bafa9 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -306,7 +306,10 @@ extern int modules_disabled; /* for sysctl */ /* Get/put a kernel symbol (calls must be symmetric) */ void *__symbol_get(const char *symbol); void *__symbol_get_gpl(const char *symbol); -#define symbol_get(x) ((typeof(&x))(__symbol_get(__stringify(x)))) +#define symbol_get(x) ({ \ + static const char __notrim[] \ + __used __section(".no_trim_symbol") = __stringify(x); \ + (typeof(&x))(__symbol_get(__stringify(x))); }) /* modules using other modules: kdb wants to see this. */ struct module_use { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2a59034a5fa2..ab550a89b9bf 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2664,6 +2664,12 @@ struct net *dev_net(const struct net_device *dev) } static inline +struct net *dev_net_rcu(const struct net_device *dev) +{ + return read_pnet_rcu(&dev->nd_net); +} + +static inline void dev_net_set(struct net_device *dev, struct net *net) { write_pnet(&dev->nd_net, net); @@ -2904,9 +2910,9 @@ struct pcpu_sw_netstats { struct pcpu_dstats { u64_stats_t rx_packets; u64_stats_t rx_bytes; - u64_stats_t rx_drops; u64_stats_t tx_packets; u64_stats_t tx_bytes; + u64_stats_t rx_drops; u64_stats_t tx_drops; struct u64_stats_sync syncp; } __aligned(8 * sizeof(u64)); @@ -3269,6 +3275,8 @@ static inline struct net_device *first_net_device_rcu(struct net *net) } int netdev_boot_setup_check(struct net_device *dev); +struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, + const char *hwaddr); struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, const char *hwaddr); struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); @@ -4109,7 +4117,6 @@ void netif_receive_skb_list(struct list_head *head); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); -void napi_get_frags_check(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 071d05d81d38..c86a11cfc4a3 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -278,7 +278,7 @@ struct netfs_io_request { #define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */ #define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */ #define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */ -#define NETFS_RREQ_NEED_RETRY 14 /* Need to try retrying */ +#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */ #define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark * write to cache on read */ const struct netfs_request_ops *netfs_ops; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 71fbebfa43c7..9ac83ca88326 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -47,6 +47,7 @@ struct nfs4_acl { struct nfs4_label { uint32_t lfs; uint32_t pi; + u32 lsmid; u32 len; char *label; }; diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index e07e8978d691..e435250fcb4d 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -13,6 +13,8 @@ #define NVME_TCP_ADMIN_CCSZ SZ_8K #define NVME_TCP_DIGEST_LENGTH 4 #define NVME_TCP_MIN_MAXH2CDATA 4096 +#define NVME_TCP_MIN_C2HTERM_PLEN 24 +#define NVME_TCP_MAX_C2HTERM_PLEN 152 enum nvme_tcp_pfv { NVME_TCP_PFV_1_0 = 0x0, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index fe3b60818fdc..2dc05b1c3283 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -199,28 +199,54 @@ enum { #define NVME_NVM_IOSQES 6 #define NVME_NVM_IOCQES 4 +/* + * Controller Configuration (CC) register (Offset 14h) + */ enum { + /* Enable (EN): bit 0 */ NVME_CC_ENABLE = 1 << 0, NVME_CC_EN_SHIFT = 0, + + /* Bits 03:01 are reserved (NVMe Base Specification rev 2.1) */ + + /* I/O Command Set Selected (CSS): bits 06:04 */ NVME_CC_CSS_SHIFT = 4, - NVME_CC_MPS_SHIFT = 7, - NVME_CC_AMS_SHIFT = 11, - NVME_CC_SHN_SHIFT = 14, - NVME_CC_IOSQES_SHIFT = 16, - NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT, NVME_CC_CSS_NVM = 0 << NVME_CC_CSS_SHIFT, NVME_CC_CSS_CSI = 6 << NVME_CC_CSS_SHIFT, - NVME_CC_CSS_MASK = 7 << NVME_CC_CSS_SHIFT, + + /* Memory Page Size (MPS): bits 10:07 */ + NVME_CC_MPS_SHIFT = 7, + NVME_CC_MPS_MASK = 0xf << NVME_CC_MPS_SHIFT, + + /* Arbitration Mechanism Selected (AMS): bits 13:11 */ + NVME_CC_AMS_SHIFT = 11, + NVME_CC_AMS_MASK = 7 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT, NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT, + + /* Shutdown Notification (SHN): bits 15:14 */ + NVME_CC_SHN_SHIFT = 14, + NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT, - NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, + + /* I/O Submission Queue Entry Size (IOSQES): bits 19:16 */ + NVME_CC_IOSQES_SHIFT = 16, + NVME_CC_IOSQES_MASK = 0xf << NVME_CC_IOSQES_SHIFT, NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, + + /* I/O Completion Queue Entry Size (IOCQES): bits 23:20 */ + NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_IOCQES_MASK = 0xf << NVME_CC_IOCQES_SHIFT, NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, + + /* Controller Ready Independent of Media Enable (CRIME): bit 24 */ NVME_CC_CRIME = 1 << 24, + + /* Bits 25:31 are reserved (NVMe Base Specification rev 2.1) */ }; enum { diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index de5deb1a0118..1a2594a38199 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3134,6 +3134,7 @@ #define PCI_DEVICE_ID_INTEL_HDA_LNL_P 0xa828 #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 #define PCI_DEVICE_ID_INTEL_HDA_BMG 0xe2f7 +#define PCI_DEVICE_ID_INTEL_HDA_PTL_H 0xe328 #define PCI_DEVICE_ID_INTEL_HDA_PTL 0xe428 #define PCI_DEVICE_ID_INTEL_HDA_CML_R 0xf0c8 #define PCI_DEVICE_ID_INTEL_HDA_RKL_S 0xf1c8 diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 903ddfea8585..f3cad182d4ef 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -815,6 +815,15 @@ struct sev_data_snp_commit { #ifdef CONFIG_CRYPTO_DEV_SP_PSP /** + * sev_module_init - perform PSP SEV module initialization + * + * Returns: + * 0 if the PSP module is successfully initialized + * negative value if the PSP module initialization fails + */ +int sev_module_init(void); + +/** * sev_platform_init - perform SEV INIT command * * @args: struct sev_platform_init_args to pass in arguments diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h index 2c8bfd0f1b6b..6322d8c1c6b4 100644 --- a/include/linux/rcuref.h +++ b/include/linux/rcuref.h @@ -71,27 +71,30 @@ static inline __must_check bool rcuref_get(rcuref_t *ref) return rcuref_get_slowpath(ref); } -extern __must_check bool rcuref_put_slowpath(rcuref_t *ref); +extern __must_check bool rcuref_put_slowpath(rcuref_t *ref, unsigned int cnt); /* * Internal helper. Do not invoke directly. */ static __always_inline __must_check bool __rcuref_put(rcuref_t *ref) { + int cnt; + RCU_LOCKDEP_WARN(!rcu_read_lock_held() && preemptible(), "suspicious rcuref_put_rcusafe() usage"); /* * Unconditionally decrease the reference count. The saturation and * dead zones provide enough tolerance for this. */ - if (likely(!atomic_add_negative_release(-1, &ref->refcnt))) + cnt = atomic_sub_return_release(1, &ref->refcnt); + if (likely(cnt >= 0)) return false; /* * Handle the last reference drop and cases inside the saturation * and dead zones. */ - return rcuref_put_slowpath(ref); + return rcuref_put_slowpath(ref, cnt); } /** diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 0f2aeb37bbb0..ca1db4b92c32 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -43,6 +43,7 @@ struct kernel_clone_args { void *fn_arg; struct cgroup *cgrp; struct css_set *cset; + unsigned int kill_seq; }; /* diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 2cbe0c22a32f..0b9095a281b8 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -91,6 +91,8 @@ struct sk_psock { struct sk_psock_progs progs; #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) struct strparser strp; + u32 copied_seq; + u32 ingress_bytes; #endif struct sk_buff_head ingress_skb; struct list_head ingress_msg; diff --git a/include/linux/socket.h b/include/linux/socket.h index d18cc47e89bd..c3322eb3d686 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -392,6 +392,8 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +extern int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len, + void *data); struct timespec64; struct __kernel_timespec; diff --git a/include/linux/string.h b/include/linux/string.h index 86d5d352068b..f8e21e80942f 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -414,7 +414,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * must be discoverable by the compiler. */ #define strtomem_pad(dest, src, pad) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ @@ -437,7 +438,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * must be discoverable by the compiler. */ #define strtomem(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ \ BUILD_BUG_ON(!__builtin_constant_p(_dest_len) || \ @@ -456,7 +458,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * Note that sizes of @dest and @src must be known at compile-time. */ #define memtostr(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ const size_t _src_chars = strnlen(src, _src_len); \ const size_t _copy_len = min(_dest_len - 1, _src_chars); \ @@ -481,7 +484,8 @@ void memcpy_and_pad(void *dest, size_t dest_len, const void *src, size_t count, * Note that sizes of @dest and @src must be known at compile-time. */ #define memtostr_pad(dest, src) do { \ - const size_t _dest_len = __builtin_object_size(dest, 1); \ + const size_t _dest_len = __must_be_byte_array(dest) + \ + ARRAY_SIZE(dest); \ const size_t _src_len = __builtin_object_size(src, 1); \ const size_t _src_chars = strnlen(src, _src_len); \ const size_t _copy_len = min(_dest_len - 1, _src_chars); \ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index fec1e8a1570c..eac57914dcf3 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -158,7 +158,6 @@ enum { RPC_TASK_NEED_XMIT, RPC_TASK_NEED_RECV, RPC_TASK_MSG_PIN_WAIT, - RPC_TASK_SIGNALLED, }; #define rpc_test_and_set_running(t) \ @@ -171,7 +170,7 @@ enum { #define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate) -#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate) +#define RPC_SIGNALLED(t) (READ_ONCE(task->tk_rpc_status) == -ERESTARTSYS) /* * Task priorities. diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index d9c767cf773d..9189354c568f 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -668,7 +668,7 @@ struct l2cap_conn { struct l2cap_chan *smp; struct list_head chan_l; - struct mutex chan_lock; + struct mutex lock; struct kref ref; struct list_head users; }; @@ -970,6 +970,7 @@ void l2cap_chan_del(struct l2cap_chan *chan, int err); void l2cap_send_conn_req(struct l2cap_chan *chan); struct l2cap_conn *l2cap_conn_get(struct l2cap_conn *conn); +struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *conn); void l2cap_conn_put(struct l2cap_conn *conn); int l2cap_register_user(struct l2cap_conn *conn, struct l2cap_user *user); diff --git a/include/net/gro.h b/include/net/gro.h index b9b58c1f8d19..7b548f91754b 100644 --- a/include/net/gro.h +++ b/include/net/gro.h @@ -11,6 +11,9 @@ #include <net/udp.h> #include <net/hotdata.h> +/* This should be increased if a protocol with a bigger head is added. */ +#define GRO_MAX_HEAD (MAX_HEADER + 128) + struct napi_gro_cb { union { struct { diff --git a/include/net/ip.h b/include/net/ip.h index 9f5e33e371fc..ba7b43447775 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -471,9 +471,12 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { const struct rtable *rt = dst_rtable(dst); - struct net *net = dev_net(dst->dev); - unsigned int mtu; + unsigned int mtu, res; + struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) || ip_mtu_locked(dst) || !forwarding) { @@ -497,7 +500,11 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, out: mtu = min_t(unsigned int, mtu, IP_MAX_MTU); - return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + res = mtu - lwtunnel_headroom(dst->lwtstate, mtu); + + rcu_read_unlock(); + + return res; } static inline unsigned int ip_skb_dst_mtu(struct sock *sk, diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 2d6141f28b53..f7fe796e8429 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -198,10 +198,12 @@ struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) if (netif_is_l3_slave(dev)) { struct net_device *master; + rcu_read_lock(); master = netdev_master_upper_dev_get_rcu(dev); if (master && master->l3mdev_ops->l3mdev_l3_out) skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, skb, proto); + rcu_read_unlock(); } return skb; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 0f5eb9db0c62..f467a66abc6b 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -297,6 +297,7 @@ static inline int check_net(const struct net *net) } void net_drop_ns(void *); +void net_passive_dec(struct net *net); #else @@ -326,8 +327,18 @@ static inline int check_net(const struct net *net) } #define net_drop_ns NULL + +static inline void net_passive_dec(struct net *net) +{ + refcount_dec(&net->passive); +} #endif +static inline void net_passive_inc(struct net *net) +{ + refcount_inc(&net->passive); +} + /* Returns true if the netns initialization is completed successfully */ static inline bool net_initialized(const struct net *net) { @@ -398,7 +409,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet) #endif } -static inline struct net *read_pnet_rcu(possible_net_t *pnet) +static inline struct net *read_pnet_rcu(const possible_net_t *pnet) { #ifdef CONFIG_NET_NS return rcu_dereference(pnet->net); diff --git a/include/net/route.h b/include/net/route.h index f86775be3e29..c605fd5ec0c0 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -382,10 +382,15 @@ static inline int inet_iif(const struct sk_buff *skb) static inline int ip4_dst_hoplimit(const struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); - struct net *net = dev_net(dst->dev); - if (hoplimit == 0) + if (hoplimit == 0) { + const struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); + rcu_read_unlock(); + } return hoplimit; } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d635c5b47eba..d48c657191cd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -851,7 +851,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, } static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, - __u64 bytes, __u32 packets) + __u64 bytes, __u64 packets) { u64_stats_update_begin(&bstats->syncp); u64_stats_add(&bstats->bytes, bytes); diff --git a/include/net/sock.h b/include/net/sock.h index 8036b3b79cd8..7ef728324e4e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1751,6 +1751,7 @@ static inline bool sock_allow_reclassification(const struct sock *csk) struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void sk_free(struct sock *sk); +void sk_net_refcnt_upgrade(struct sock *sk); void sk_destruct(struct sock *sk); struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority); void sk_free_unlock_clone(struct sock *sk); diff --git a/include/net/strparser.h b/include/net/strparser.h index 41e2ce9e9e10..0a83010b3a64 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -43,6 +43,8 @@ struct strparser; struct strp_callbacks { int (*parse_msg)(struct strparser *strp, struct sk_buff *skb); void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); + int (*read_sock)(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor); int (*read_sock_done)(struct strparser *strp, int err); void (*abort_parser)(struct strparser *strp, int err); void (*lock)(struct strparser *strp); diff --git a/include/net/tcp.h b/include/net/tcp.h index 5b2b04835688..2d08473a6dc0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -41,6 +41,7 @@ #include <net/inet_ecn.h> #include <net/dst.h> #include <net/mptcp.h> +#include <net/xfrm.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> @@ -683,6 +684,19 @@ void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); void tcp_sack_compress_send_ack(struct sock *sk); +static inline void tcp_cleanup_skb(struct sk_buff *skb) +{ + skb_dst_drop(skb); + secpath_reset(skb); +} + +static inline void tcp_add_receive_queue(struct sock *sk, struct sk_buff *skb) +{ + DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); + DEBUG_NET_WARN_ON_ONCE(secpath_exists(skb)); + __skb_queue_tail(&sk->sk_receive_queue, skb); +} + /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) @@ -729,6 +743,9 @@ void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); +int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack, + u32 *copied_seq); int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor); struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off); void tcp_read_done(struct sock *sk, size_t len); @@ -2599,6 +2616,11 @@ struct sk_psock; #ifdef CONFIG_BPF_SYSCALL int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); +#ifdef CONFIG_BPF_STREAM_PARSER +struct strparser; +int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor); +#endif /* CONFIG_BPF_STREAM_PARSER */ #endif /* CONFIG_BPF_SYSCALL */ #ifdef CONFIG_INET diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index 3dc7a1551ac3..5d653a3491d0 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -12,6 +12,7 @@ #include <linux/firmware/cirrus/cs_dsp.h> #include <linux/regulator/consumer.h> #include <linux/regmap.h> +#include <linux/spi/spi.h> #include <sound/cs-amp-lib.h> #define CS35L56_DEVID 0x0000000 @@ -61,6 +62,7 @@ #define CS35L56_IRQ1_MASK_8 0x000E0AC #define CS35L56_IRQ1_MASK_18 0x000E0D4 #define CS35L56_IRQ1_MASK_20 0x000E0DC +#define CS35L56_DSP_MBOX_1_RAW 0x0011000 #define CS35L56_DSP_VIRTUAL1_MBOX_1 0x0011020 #define CS35L56_DSP_VIRTUAL1_MBOX_2 0x0011024 #define CS35L56_DSP_VIRTUAL1_MBOX_3 0x0011028 @@ -224,6 +226,7 @@ #define CS35L56_HALO_STATE_SHUTDOWN 1 #define CS35L56_HALO_STATE_BOOT_DONE 2 +#define CS35L56_MBOX_CMD_PING 0x0A000000 #define CS35L56_MBOX_CMD_AUDIO_PLAY 0x0B000001 #define CS35L56_MBOX_CMD_AUDIO_PAUSE 0x0B000002 #define CS35L56_MBOX_CMD_AUDIO_REINIT 0x0B000003 @@ -254,6 +257,16 @@ #define CS35L56_NUM_BULK_SUPPLIES 3 #define CS35L56_NUM_DSP_REGIONS 5 +/* Additional margin for SYSTEM_RESET to control port ready on SPI */ +#define CS35L56_SPI_RESET_TO_PORT_READY_US (CS35L56_CONTROL_PORT_READY_US + 2500) + +struct cs35l56_spi_payload { + __be32 addr; + __be16 pad; + __be32 value; +} __packed; +static_assert(sizeof(struct cs35l56_spi_payload) == 10); + struct cs35l56_base { struct device *dev; struct regmap *regmap; @@ -269,6 +282,7 @@ struct cs35l56_base { s8 cal_index; struct cirrus_amp_cal_data cal_data; struct gpio_desc *reset_gpio; + struct cs35l56_spi_payload *spi_payload_buf; }; static inline bool cs35l56_is_otp_register(unsigned int reg) @@ -276,6 +290,23 @@ static inline bool cs35l56_is_otp_register(unsigned int reg) return (reg >> 16) == 3; } +static inline int cs35l56_init_config_for_spi(struct cs35l56_base *cs35l56, + struct spi_device *spi) +{ + cs35l56->spi_payload_buf = devm_kzalloc(&spi->dev, + sizeof(*cs35l56->spi_payload_buf), + GFP_KERNEL | GFP_DMA); + if (!cs35l56->spi_payload_buf) + return -ENOMEM; + + return 0; +} + +static inline bool cs35l56_is_spi(struct cs35l56_base *cs35l56) +{ + return IS_ENABLED(CONFIG_SPI_MASTER) && !!cs35l56->spi_payload_buf; +} + extern const struct regmap_config cs35l56_regmap_i2c; extern const struct regmap_config cs35l56_regmap_spi; extern const struct regmap_config cs35l56_regmap_sdw; diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index b0db89058c91..958a2460330c 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -174,6 +174,7 @@ enum yfs_cm_operation { EM(afs_cell_trace_get_queue_dns, "GET q-dns ") \ EM(afs_cell_trace_get_queue_manage, "GET q-mng ") \ EM(afs_cell_trace_get_queue_new, "GET q-new ") \ + EM(afs_cell_trace_get_server, "GET server") \ EM(afs_cell_trace_get_vol, "GET vol ") \ EM(afs_cell_trace_insert, "INSERT ") \ EM(afs_cell_trace_manage, "MANAGE ") \ @@ -182,6 +183,7 @@ enum yfs_cm_operation { EM(afs_cell_trace_put_destroy, "PUT destry") \ EM(afs_cell_trace_put_queue_work, "PUT q-work") \ EM(afs_cell_trace_put_queue_fail, "PUT q-fail") \ + EM(afs_cell_trace_put_server, "PUT server") \ EM(afs_cell_trace_put_vol, "PUT vol ") \ EM(afs_cell_trace_see_source, "SEE source") \ EM(afs_cell_trace_see_ws, "SEE ws ") \ diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 6e699cadcb29..f880835f7695 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -99,7 +99,7 @@ EM(netfs_sreq_trace_limited, "LIMIT") \ EM(netfs_sreq_trace_need_clear, "N-CLR") \ EM(netfs_sreq_trace_partial_read, "PARTR") \ - EM(netfs_sreq_trace_need_retry, "NRTRY") \ + EM(netfs_sreq_trace_need_retry, "ND-RT") \ EM(netfs_sreq_trace_prepare, "PREP ") \ EM(netfs_sreq_trace_prep_failed, "PRPFL") \ EM(netfs_sreq_trace_progress, "PRGRS") \ @@ -108,7 +108,9 @@ EM(netfs_sreq_trace_short, "SHORT") \ EM(netfs_sreq_trace_split, "SPLIT") \ EM(netfs_sreq_trace_submit, "SUBMT") \ + EM(netfs_sreq_trace_superfluous, "SPRFL") \ EM(netfs_sreq_trace_terminated, "TERM ") \ + EM(netfs_sreq_trace_wait_for, "_WAIT") \ EM(netfs_sreq_trace_write, "WRITE") \ EM(netfs_sreq_trace_write_skip, "SKIP ") \ E_(netfs_sreq_trace_write_term, "WTERM") diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 2f119d18a061..cad50d91077e 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -219,6 +219,7 @@ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ EM(rxrpc_conn_get_idle, "GET idle ") \ EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \ + EM(rxrpc_conn_get_poke_secured, "GET secured ") \ EM(rxrpc_conn_get_poke_timer, "GET poke ") \ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ EM(rxrpc_conn_new_client, "NEW client ") \ diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index b13dc275ef4a..851841336ee6 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -360,8 +360,7 @@ TRACE_EVENT(rpc_request, { (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \ { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \ { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \ - { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }, \ - { (1UL << RPC_TASK_SIGNALLED), "SIGNALLED" }) + { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }) DECLARE_EVENT_CLASS(rpc_task_running, diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index efe5de6ce208..aaa4f3bc688b 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -411,13 +411,20 @@ struct drm_amdgpu_gem_userptr { /* GFX12 and later: */ #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0 #define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7 -/* These are DCC recompression setting for memory management: */ +/* These are DCC recompression settings for memory management: */ #define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3 #define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */ #define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5 #define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */ #define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8 #define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */ +/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata + * to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */ +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1 +/* bit gap */ +#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63 +#define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1 /* Set/Get helpers for tiling flags. */ #define AMDGPU_TILING_SET(field, value) \ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index d1089b88efc7..9b18c4cfe56f 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -682,6 +682,7 @@ enum ethtool_link_ext_substate_module { * @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics * @ETH_SS_STATS_RMON: names of RMON statistics * @ETH_SS_STATS_PHY: names of PHY(dev) statistics + * @ETH_SS_TS_FLAGS: hardware timestamping flags * * @ETH_SS_COUNT: number of defined string sets */ @@ -708,6 +709,7 @@ enum ethtool_stringset { ETH_SS_STATS_ETH_CTRL, ETH_SS_STATS_RMON, ETH_SS_STATS_PHY, + ETH_SS_TS_FLAGS, /* add new constants above here */ ETH_SS_COUNT diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index e11c82638527..050fa8eb2e8f 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -380,7 +380,7 @@ enum io_uring_op { * result will be the number of buffers send, with * the starting buffer ID in cqe->flags as per * usual for provided buffer usage. The buffers - * will be contigious from the starting buffer ID. + * will be contiguous from the starting buffer ID. */ #define IORING_RECVSEND_POLL_FIRST (1U << 0) #define IORING_RECV_MULTISHOT (1U << 1) diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 33745642f787..e1d2c27533b4 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -268,7 +268,9 @@ struct landlock_net_port_attr { * ~~~~~~~~~~~~~~~~ * * These flags enable to restrict a sandboxed process to a set of network - * actions. This is supported since the Landlock ABI version 4. + * actions. + * + * This is supported since Landlock ABI version 4. * * The following access rights apply to TCP port numbers: * @@ -291,11 +293,13 @@ struct landlock_net_port_attr { * Setting a flag for a ruleset will isolate the Landlock domain to forbid * connections to resources outside the domain. * + * This is supported since Landlock ABI version 6. + * * Scopes: * * - %LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET: Restrict a sandboxed process from * connecting to an abstract UNIX socket created by a process outside the - * related Landlock domain (e.g. a parent domain or a non-sandboxed process). + * related Landlock domain (e.g., a parent domain or a non-sandboxed process). * - %LANDLOCK_SCOPE_SIGNAL: Restrict a sandboxed process from sending a signal * to another process outside the domain. */ diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 934e20ef7f79..95762232e018 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 14 +#define TASKSTATS_VERSION 15 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h index 349718c271eb..46a2633d33aa 100644 --- a/include/uapi/linux/thermal.h +++ b/include/uapi/linux/thermal.h @@ -30,7 +30,6 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_TZ, THERMAL_GENL_ATTR_TZ_ID, THERMAL_GENL_ATTR_TZ_TEMP, - THERMAL_GENL_ATTR_TZ_PREV_TEMP, THERMAL_GENL_ATTR_TZ_TRIP, THERMAL_GENL_ATTR_TZ_TRIP_ID, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, @@ -54,6 +53,7 @@ enum thermal_genl_attr { THERMAL_GENL_ATTR_THRESHOLD, THERMAL_GENL_ATTR_THRESHOLD_TEMP, THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, + THERMAL_GENL_ATTR_TZ_PREV_TEMP, __THERMAL_GENL_ATTR_MAX, }; #define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1) diff --git a/include/uapi/linux/virtio_snd.h b/include/uapi/linux/virtio_snd.h index 5f4100c2cf04..a4cfb9f6561a 100644 --- a/include/uapi/linux/virtio_snd.h +++ b/include/uapi/linux/virtio_snd.h @@ -25,7 +25,7 @@ struct virtio_snd_config { __le32 streams; /* # of available channel maps */ __le32 chmaps; - /* # of available control elements */ + /* # of available control elements (if VIRTIO_SND_F_CTLS) */ __le32 controls; }; diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index 89672ad8c3bb..f151feb0ca8c 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -385,8 +385,8 @@ enum { /* Possible values for dExtendedUFSFeaturesSupport */ enum { - UFS_DEV_LOW_TEMP_NOTIF = BIT(4), - UFS_DEV_HIGH_TEMP_NOTIF = BIT(5), + UFS_DEV_HIGH_TEMP_NOTIF = BIT(4), + UFS_DEV_LOW_TEMP_NOTIF = BIT(5), UFS_DEV_EXT_TEMP_NOTIF = BIT(6), UFS_DEV_HPB_SUPPORT = BIT(7), UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 650ff238cd74..8bf31e6ca4e5 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1309,7 +1309,6 @@ static inline void ufshcd_rmwl(struct ufs_hba *hba, u32 mask, u32 val, u32 reg) void ufshcd_enable_irq(struct ufs_hba *hba); void ufshcd_disable_irq(struct ufs_hba *hba); int ufshcd_alloc_host(struct device *, struct ufs_hba **); -void ufshcd_dealloc_host(struct ufs_hba *); int ufshcd_hba_enable(struct ufs_hba *hba); int ufshcd_init(struct ufs_hba *, void __iomem *, unsigned int); int ufshcd_link_recovery(struct ufs_hba *hba); diff --git a/io_uring/futex.c b/io_uring/futex.c index 3159a2b7eeca..43e2143255f5 100644 --- a/io_uring/futex.c +++ b/io_uring/futex.c @@ -338,7 +338,7 @@ int io_futex_wait(struct io_kiocb *req, unsigned int issue_flags) hlist_add_head(&req->hash_node, &ctx->futex_list); io_ring_submit_unlock(ctx, issue_flags); - futex_queue(&ifd->q, hb); + futex_queue(&ifd->q, hb, NULL); return IOU_ISSUE_SKIP_COMPLETE; } diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 5d0928f37471..91019b4d0308 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -64,7 +64,7 @@ struct io_worker { union { struct rcu_head rcu; - struct work_struct work; + struct delayed_work work; }; }; @@ -770,6 +770,18 @@ static inline bool io_should_retry_thread(struct io_worker *worker, long err) } } +static void queue_create_worker_retry(struct io_worker *worker) +{ + /* + * We only bother retrying because there's a chance that the + * failure to create a worker is due to some temporary condition + * in the forking task (e.g. outstanding signal); give the task + * some time to clear that condition. + */ + schedule_delayed_work(&worker->work, + msecs_to_jiffies(worker->init_retries * 5)); +} + static void create_worker_cont(struct callback_head *cb) { struct io_worker *worker; @@ -809,12 +821,13 @@ static void create_worker_cont(struct callback_head *cb) /* re-create attempts grab a new worker ref, drop the existing one */ io_worker_release(worker); - schedule_work(&worker->work); + queue_create_worker_retry(worker); } static void io_workqueue_create(struct work_struct *work) { - struct io_worker *worker = container_of(work, struct io_worker, work); + struct io_worker *worker = container_of(work, struct io_worker, + work.work); struct io_wq_acct *acct = io_wq_get_acct(worker); if (!io_queue_worker_create(worker, acct, create_worker_cont)) @@ -855,8 +868,8 @@ fail: kfree(worker); goto fail; } else { - INIT_WORK(&worker->work, io_workqueue_create); - schedule_work(&worker->work); + INIT_DELAYED_WORK(&worker->work, io_workqueue_create); + queue_create_worker_retry(worker); } return true; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index ceacf6230e34..f7acae5f7e1d 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2045,6 +2045,8 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->opcode = 0; return io_init_fail_req(req, -EINVAL); } + opcode = array_index_nospec(opcode, IORING_OP_LAST); + def = &io_issue_defs[opcode]; if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) { /* enforce forwards compatibility on users */ diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 04bf493eecae..8e72de7712ac 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -415,6 +415,13 @@ void io_destroy_buffers(struct io_ring_ctx *ctx) } } +static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) +{ + scoped_guard(mutex, &ctx->mmap_lock) + WARN_ON_ONCE(xa_erase(&ctx->io_bl_xa, bl->bgid) != bl); + io_put_bl(ctx, bl); +} + int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); @@ -636,12 +643,13 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) /* if mapped buffer ring OR classic exists, don't allow */ if (bl->flags & IOBL_BUF_RING || !list_empty(&bl->buf_list)) return -EEXIST; - } else { - free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); - if (!bl) - return -ENOMEM; + io_destroy_bl(ctx, bl); } + free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); + if (!bl) + return -ENOMEM; + mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT; ring_size = flex_array_size(br, bufs, reg.ring_entries); diff --git a/io_uring/net.c b/io_uring/net.c index 17852a6616ff..5d0b56ff50ee 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -322,7 +322,9 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req, if (unlikely(ret)) return ret; - return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); + ret = __get_compat_msghdr(&iomsg->msg, &cmsg, NULL); + sr->msg_control = iomsg->msg.msg_control_user; + return ret; } #endif diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 190f7ee45de9..89ea0135a1a0 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -4,12 +4,6 @@ #include <linux/lockdep.h> -#define IO_NODE_ALLOC_CACHE_MAX 32 - -#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) -#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT) -#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1) - enum { IORING_RSRC_FILE = 0, IORING_RSRC_BUFFER = 1, diff --git a/io_uring/rw.c b/io_uring/rw.c index 7aa1e4c9f64a..9edc6baebd01 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -23,6 +23,9 @@ #include "poll.h" #include "rw.h" +static void io_complete_rw(struct kiocb *kiocb, long res); +static void io_complete_rw_iopoll(struct kiocb *kiocb, long res); + struct io_rw { /* NOTE: kiocb has the file as the first member, so don't do it here */ struct kiocb kiocb; @@ -289,6 +292,11 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, rw->kiocb.dio_complete = NULL; rw->kiocb.ki_flags = 0; + if (req->ctx->flags & IORING_SETUP_IOPOLL) + rw->kiocb.ki_complete = io_complete_rw_iopoll; + else + rw->kiocb.ki_complete = io_complete_rw; + rw->addr = READ_ONCE(sqe->addr); rw->len = READ_ONCE(sqe->len); rw->flags = READ_ONCE(sqe->rw_flags); @@ -563,8 +571,10 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) smp_store_release(&req->iopoll_completed, 1); } -static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) +static inline void io_rw_done(struct io_kiocb *req, ssize_t ret) { + struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw); + /* IO was queued async, completion will happen later */ if (ret == -EIOCBQUEUED) return; @@ -586,8 +596,10 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) } } - INDIRECT_CALL_2(kiocb->ki_complete, io_complete_rw_iopoll, - io_complete_rw, kiocb, ret); + if (req->ctx->flags & IORING_SETUP_IOPOLL) + io_complete_rw_iopoll(&rw->kiocb, ret); + else + io_complete_rw(&rw->kiocb, ret); } static int kiocb_done(struct io_kiocb *req, ssize_t ret, @@ -598,7 +610,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, if (ret >= 0 && req->flags & REQ_F_CUR_POS) req->file->f_pos = rw->kiocb.ki_pos; - if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) { + if (ret >= 0 && !(req->ctx->flags & IORING_SETUP_IOPOLL)) { __io_complete_rw_common(req, ret); /* * Safe to call io_end from here as we're inline @@ -609,7 +621,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret, io_req_rw_cleanup(req, issue_flags); return IOU_OK; } else { - io_rw_done(&rw->kiocb, ret); + io_rw_done(req, ret); } return IOU_ISSUE_SKIP_COMPLETE; @@ -813,10 +825,8 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) if (ctx->flags & IORING_SETUP_IOPOLL) { if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) return -EOPNOTSUPP; - kiocb->private = NULL; kiocb->ki_flags |= IOCB_HIPRI; - kiocb->ki_complete = io_complete_rw_iopoll; req->iopoll_completed = 0; if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) { /* make sure every req only blocks once*/ @@ -826,7 +836,6 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) } else { if (kiocb->ki_flags & IOCB_HIPRI) return -EINVAL; - kiocb->ki_complete = io_complete_rw; } if (req->flags & REQ_F_HAS_METADATA) { @@ -904,7 +913,8 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags) } else if (ret == -EIOCBQUEUED) { return IOU_ISSUE_SKIP_COMPLETE; } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock || - (req->flags & REQ_F_NOWAIT) || !need_complete_io(req)) { + (req->flags & REQ_F_NOWAIT) || !need_complete_io(req) || + (issue_flags & IO_URING_F_MULTISHOT)) { /* read all, failed, already did sync or don't want to retry */ goto done; } @@ -977,6 +987,8 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags) if (!io_file_can_poll(req)) return -EBADFD; + /* make it sync, multishot doesn't support async execution */ + rw->kiocb.ki_complete = NULL; ret = __io_read(req, issue_flags); /* diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 1f6a82128b47..e6701b7aa147 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -54,9 +54,6 @@ bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx, continue; if (cmd->flags & IORING_URING_CMD_CANCELABLE) { - /* ->sqe isn't available if no async data */ - if (!req_has_async_data(req)) - cmd->sqe = NULL; file->f_op->uring_cmd(cmd, IO_URING_F_CANCEL | IO_URING_F_COMPLETE_DEFER); ret = true; @@ -179,12 +176,13 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, return -ENOMEM; cache->op_data = NULL; - if (!(req->flags & REQ_F_FORCE_ASYNC)) { - /* defer memcpy until we need it */ - ioucmd->sqe = sqe; - return 0; - } - + /* + * Unconditionally cache the SQE for now - this is only needed for + * requests that go async, but prep handlers must ensure that any + * sqe data is stable beyond prep. Since uring_cmd is special in + * that it doesn't read in per-op data, play it safe and ensure that + * any SQE data is stable beyond prep. This can later get relaxed. + */ memcpy(cache->sqes, sqe, uring_sqe_size(req->ctx)); ioucmd->sqe = cache->sqes; return 0; @@ -249,16 +247,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) } ret = file->f_op->uring_cmd(ioucmd, issue_flags); - if (ret == -EAGAIN) { - struct io_uring_cmd_data *cache = req->async_data; - - if (ioucmd->sqe != (void *) cache) - memcpy(cache->sqes, ioucmd->sqe, uring_sqe_size(req->ctx)); - return -EAGAIN; - } else if (ret == -EIOCBQUEUED) { - return -EIOCBQUEUED; - } - + if (ret == -EAGAIN || ret == -EIOCBQUEUED) + return ret; if (ret < 0) req_set_fail(req); io_req_uring_cleanup(req, issue_flags); diff --git a/io_uring/waitid.c b/io_uring/waitid.c index 853e97a7b0ec..15a7daf3ff4f 100644 --- a/io_uring/waitid.c +++ b/io_uring/waitid.c @@ -118,7 +118,6 @@ static int io_waitid_finish(struct io_kiocb *req, int ret) static void io_waitid_complete(struct io_kiocb *req, int ret) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); - struct io_tw_state ts = {}; /* anyone completing better be holding a reference */ WARN_ON_ONCE(!(atomic_read(&iw->refs) & IO_WAITID_REF_MASK)); @@ -131,7 +130,6 @@ static void io_waitid_complete(struct io_kiocb *req, int ret) if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); - io_req_task_complete(req, &ts); } static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) @@ -153,6 +151,7 @@ static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req) list_del_init(&iwa->wo.child_wait.entry); spin_unlock_irq(&iw->head->lock); io_waitid_complete(req, -ECANCELED); + io_req_queue_tw_complete(req, -ECANCELED); return true; } @@ -258,6 +257,7 @@ static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts) } io_waitid_complete(req, ret); + io_req_task_complete(req, ts); } static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode, @@ -285,10 +285,16 @@ static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode, int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + struct io_waitid_async *iwa; if (sqe->addr || sqe->buf_index || sqe->addr3 || sqe->waitid_flags) return -EINVAL; + iwa = io_uring_alloc_async_data(NULL, req); + if (!unlikely(iwa)) + return -ENOMEM; + iwa->req = req; + iw->which = READ_ONCE(sqe->len); iw->upid = READ_ONCE(sqe->fd); iw->options = READ_ONCE(sqe->file_index); @@ -299,16 +305,10 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) int io_waitid(struct io_kiocb *req, unsigned int issue_flags) { struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid); + struct io_waitid_async *iwa = req->async_data; struct io_ring_ctx *ctx = req->ctx; - struct io_waitid_async *iwa; int ret; - iwa = io_uring_alloc_async_data(NULL, req); - if (!iwa) - return -ENOMEM; - - iwa->req = req; - ret = kernel_waitid_prepare(&iwa->wo, iw->which, iw->upid, &iw->info, iw->options, NULL); if (ret) diff --git a/kernel/acct.c b/kernel/acct.c index 31222e8cd534..6520baa13669 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -103,48 +103,50 @@ struct bsd_acct_struct { atomic_long_t count; struct rcu_head rcu; struct mutex lock; - int active; + bool active; + bool check_space; unsigned long needcheck; struct file *file; struct pid_namespace *ns; struct work_struct work; struct completion done; + acct_t ac; }; -static void do_acct_process(struct bsd_acct_struct *acct); +static void fill_ac(struct bsd_acct_struct *acct); +static void acct_write_process(struct bsd_acct_struct *acct); /* * Check the amount of free space and suspend/resume accordingly. */ -static int check_free_space(struct bsd_acct_struct *acct) +static bool check_free_space(struct bsd_acct_struct *acct) { struct kstatfs sbuf; - if (time_is_after_jiffies(acct->needcheck)) - goto out; + if (!acct->check_space) + return acct->active; /* May block */ if (vfs_statfs(&acct->file->f_path, &sbuf)) - goto out; + return acct->active; if (acct->active) { u64 suspend = sbuf.f_blocks * SUSPEND; do_div(suspend, 100); if (sbuf.f_bavail <= suspend) { - acct->active = 0; + acct->active = false; pr_info("Process accounting paused\n"); } } else { u64 resume = sbuf.f_blocks * RESUME; do_div(resume, 100); if (sbuf.f_bavail >= resume) { - acct->active = 1; + acct->active = true; pr_info("Process accounting resumed\n"); } } acct->needcheck = jiffies + ACCT_TIMEOUT*HZ; -out: return acct->active; } @@ -189,7 +191,11 @@ static void acct_pin_kill(struct fs_pin *pin) { struct bsd_acct_struct *acct = to_acct(pin); mutex_lock(&acct->lock); - do_acct_process(acct); + /* + * Fill the accounting struct with the exiting task's info + * before punting to the workqueue. + */ + fill_ac(acct); schedule_work(&acct->work); wait_for_completion(&acct->done); cmpxchg(&acct->ns->bacct, pin, NULL); @@ -202,6 +208,9 @@ static void close_work(struct work_struct *work) { struct bsd_acct_struct *acct = container_of(work, struct bsd_acct_struct, work); struct file *file = acct->file; + + /* We were fired by acct_pin_kill() which holds acct->lock. */ + acct_write_process(acct); if (file->f_op->flush) file->f_op->flush(file, NULL); __fput_sync(file); @@ -234,6 +243,20 @@ static int acct_on(struct filename *pathname) return -EACCES; } + /* Exclude kernel kernel internal filesystems. */ + if (file_inode(file)->i_sb->s_flags & (SB_NOUSER | SB_KERNMOUNT)) { + kfree(acct); + filp_close(file, NULL); + return -EINVAL; + } + + /* Exclude procfs and sysfs. */ + if (file_inode(file)->i_sb->s_iflags & SB_I_USERNS_VISIBLE) { + kfree(acct); + filp_close(file, NULL); + return -EINVAL; + } + if (!(file->f_mode & FMODE_CAN_WRITE)) { kfree(acct); filp_close(file, NULL); @@ -430,13 +453,27 @@ static u32 encode_float(u64 value) * do_exit() or when switching to a different output file. */ -static void fill_ac(acct_t *ac) +static void fill_ac(struct bsd_acct_struct *acct) { struct pacct_struct *pacct = ¤t->signal->pacct; + struct file *file = acct->file; + acct_t *ac = &acct->ac; u64 elapsed, run_time; time64_t btime; struct tty_struct *tty; + lockdep_assert_held(&acct->lock); + + if (time_is_after_jiffies(acct->needcheck)) { + acct->check_space = false; + + /* Don't fill in @ac if nothing will be written. */ + if (!acct->active) + return; + } else { + acct->check_space = true; + } + /* * Fill the accounting struct with the needed info as recorded * by the different kernel functions. @@ -484,64 +521,61 @@ static void fill_ac(acct_t *ac) ac->ac_majflt = encode_comp_t(pacct->ac_majflt); ac->ac_exitcode = pacct->ac_exitcode; spin_unlock_irq(¤t->sighand->siglock); -} -/* - * do_acct_process does all actual work. Caller holds the reference to file. - */ -static void do_acct_process(struct bsd_acct_struct *acct) -{ - acct_t ac; - unsigned long flim; - const struct cred *orig_cred; - struct file *file = acct->file; - /* - * Accounting records are not subject to resource limits. - */ - flim = rlimit(RLIMIT_FSIZE); - current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; - /* Perform file operations on behalf of whoever enabled accounting */ - orig_cred = override_creds(file->f_cred); - - /* - * First check to see if there is enough free_space to continue - * the process accounting system. - */ - if (!check_free_space(acct)) - goto out; - - fill_ac(&ac); /* we really need to bite the bullet and change layout */ - ac.ac_uid = from_kuid_munged(file->f_cred->user_ns, orig_cred->uid); - ac.ac_gid = from_kgid_munged(file->f_cred->user_ns, orig_cred->gid); + ac->ac_uid = from_kuid_munged(file->f_cred->user_ns, current_uid()); + ac->ac_gid = from_kgid_munged(file->f_cred->user_ns, current_gid()); #if ACCT_VERSION == 1 || ACCT_VERSION == 2 /* backward-compatible 16 bit fields */ - ac.ac_uid16 = ac.ac_uid; - ac.ac_gid16 = ac.ac_gid; + ac->ac_uid16 = ac->ac_uid; + ac->ac_gid16 = ac->ac_gid; #elif ACCT_VERSION == 3 { struct pid_namespace *ns = acct->ns; - ac.ac_pid = task_tgid_nr_ns(current, ns); + ac->ac_pid = task_tgid_nr_ns(current, ns); rcu_read_lock(); - ac.ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), - ns); + ac->ac_ppid = task_tgid_nr_ns(rcu_dereference(current->real_parent), ns); rcu_read_unlock(); } #endif +} + +static void acct_write_process(struct bsd_acct_struct *acct) +{ + struct file *file = acct->file; + const struct cred *cred; + acct_t *ac = &acct->ac; + + /* Perform file operations on behalf of whoever enabled accounting */ + cred = override_creds(file->f_cred); + /* - * Get freeze protection. If the fs is frozen, just skip the write - * as we could deadlock the system otherwise. + * First check to see if there is enough free_space to continue + * the process accounting system. Then get freeze protection. If + * the fs is frozen, just skip the write as we could deadlock + * the system otherwise. */ - if (file_start_write_trylock(file)) { + if (check_free_space(acct) && file_start_write_trylock(file)) { /* it's been opened O_APPEND, so position is irrelevant */ loff_t pos = 0; - __kernel_write(file, &ac, sizeof(acct_t), &pos); + __kernel_write(file, ac, sizeof(acct_t), &pos); file_end_write(file); } -out: + + revert_creds(cred); +} + +static void do_acct_process(struct bsd_acct_struct *acct) +{ + unsigned long flim; + + /* Accounting records are not subject to resource limits. */ + flim = rlimit(RLIMIT_FSIZE); + current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; + fill_ac(acct); + acct_write_process(acct); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; - revert_creds(orig_cred); } /** diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 870aeb51d70a..095a9554e1de 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -39,7 +39,7 @@ */ /* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */ -#define GUARD_SZ (1ull << sizeof_field(struct bpf_insn, off) * 8) +#define GUARD_SZ round_up(1ull << sizeof_field(struct bpf_insn, off) * 8, PAGE_SIZE << 1) #define KERN_VM_SZ (SZ_4G + GUARD_SZ) struct bpf_arena { diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index d5dc65bb1755..54ff2a85d4c0 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -153,7 +153,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) static void cgroup_storage_map_free(struct bpf_map *map) { - bpf_local_storage_map_free(map, &cgroup_cache, NULL); + bpf_local_storage_map_free(map, &cgroup_cache, &bpf_cgrp_storage_busy); } /* *gfp_flags* is a hidden argument provided by the verifier */ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 9de6acddd479..c3223e0db2f5 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6507,6 +6507,8 @@ static const struct bpf_raw_tp_null_args raw_tp_null_args[] = { /* rxrpc */ { "rxrpc_recvdata", 0x1 }, { "rxrpc_resend", 0x10 }, + /* skb */ + {"kfree_skb", 0x1000}, /* sunrpc */ { "xs_stream_read_data", 0x1 }, /* ... from xprt_cong_event event class */ diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index e1cfe890e0be..1499d8caa9a3 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -268,8 +268,6 @@ static int ringbuf_map_mmap_kern(struct bpf_map *map, struct vm_area_struct *vma /* allow writable mapping for the consumer_pos only */ if (vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != PAGE_SIZE) return -EPERM; - } else { - vm_flags_clear(vma, VM_MAYWRITE); } /* remap_vmalloc_range() checks size and offset constraints */ return remap_vmalloc_range(vma, rb_map->rb, @@ -289,8 +287,6 @@ static int ringbuf_map_mmap_user(struct bpf_map *map, struct vm_area_struct *vma * position, and the ring buffer data itself. */ return -EPERM; - } else { - vm_flags_clear(vma, VM_MAYWRITE); } /* remap_vmalloc_range() checks size and offset constraints */ return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c420edbfb7c8..e1e42e918ba7 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1035,7 +1035,7 @@ static const struct vm_operations_struct bpf_map_default_vmops = { static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) { struct bpf_map *map = filp->private_data; - int err; + int err = 0; if (!map->ops->map_mmap || !IS_ERR_OR_NULL(map->record)) return -ENOTSUPP; @@ -1059,24 +1059,33 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) err = -EACCES; goto out; } + bpf_map_write_active_inc(map); } +out: + mutex_unlock(&map->freeze_mutex); + if (err) + return err; /* set default open/close callbacks */ vma->vm_ops = &bpf_map_default_vmops; vma->vm_private_data = map; vm_flags_clear(vma, VM_MAYEXEC); + /* If mapping is read-only, then disallow potentially re-mapping with + * PROT_WRITE by dropping VM_MAYWRITE flag. This VM_MAYWRITE clearing + * means that as far as BPF map's memory-mapped VMAs are concerned, + * VM_WRITE and VM_MAYWRITE and equivalent, if one of them is set, + * both should be set, so we can forget about VM_MAYWRITE and always + * check just VM_WRITE + */ if (!(vma->vm_flags & VM_WRITE)) - /* disallow re-mapping with PROT_WRITE */ vm_flags_clear(vma, VM_MAYWRITE); err = map->ops->map_mmap(map, vma); - if (err) - goto out; + if (err) { + if (vma->vm_flags & VM_WRITE) + bpf_map_write_active_dec(map); + } - if (vma->vm_flags & VM_MAYWRITE) - bpf_map_write_active_inc(map); -out: - mutex_unlock(&map->freeze_mutex); return err; } @@ -1968,8 +1977,6 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, return err; } -#define MAP_LOOKUP_RETRIES 3 - int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, union bpf_attr __user *uattr) @@ -1979,8 +1986,8 @@ int generic_map_lookup_batch(struct bpf_map *map, void __user *values = u64_to_user_ptr(attr->batch.values); void __user *keys = u64_to_user_ptr(attr->batch.keys); void *buf, *buf_prevkey, *prev_key, *key, *value; - int err, retry = MAP_LOOKUP_RETRIES; u32 value_size, cp, max_count; + int err; if (attr->batch.elem_flags & ~BPF_F_LOCK) return -EINVAL; @@ -2026,14 +2033,8 @@ int generic_map_lookup_batch(struct bpf_map *map, err = bpf_map_copy_value(map, key, value, attr->batch.elem_flags); - if (err == -ENOENT) { - if (retry) { - retry--; - continue; - } - err = -EINTR; - break; - } + if (err == -ENOENT) + goto next_key; if (err) goto free_buf; @@ -2048,12 +2049,12 @@ int generic_map_lookup_batch(struct bpf_map *map, goto free_buf; } + cp++; +next_key: if (!prev_key) prev_key = buf_prevkey; swap(prev_key, key); - retry = MAP_LOOKUP_RETRIES; - cp++; cond_resched(); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9971c03adfd5..60611df77957 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1501,6 +1501,8 @@ static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum r struct bpf_reference_state *s; s = acquire_reference_state(env, insn_idx); + if (!s) + return -ENOMEM; s->type = type; s->id = id; s->ptr = ptr; @@ -9149,10 +9151,11 @@ static int check_reg_const_str(struct bpf_verifier_env *env, return 0; } -/* Returns constant key value if possible, else negative error */ -static s64 get_constant_map_key(struct bpf_verifier_env *env, +/* Returns constant key value in `value` if possible, else negative error */ +static int get_constant_map_key(struct bpf_verifier_env *env, struct bpf_reg_state *key, - u32 key_size) + u32 key_size, + s64 *value) { struct bpf_func_state *state = func(env, key); struct bpf_reg_state *reg; @@ -9179,8 +9182,10 @@ static s64 get_constant_map_key(struct bpf_verifier_env *env, /* First handle precisely tracked STACK_ZERO */ for (i = off; i >= 0 && stype[i] == STACK_ZERO; i--) zero_size++; - if (zero_size >= key_size) + if (zero_size >= key_size) { + *value = 0; return 0; + } /* Check that stack contains a scalar spill of expected size */ if (!is_spilled_scalar_reg(&state->stack[spi])) @@ -9203,9 +9208,12 @@ static s64 get_constant_map_key(struct bpf_verifier_env *env, if (err < 0) return err; - return reg->var_off.value; + *value = reg->var_off.value; + return 0; } +static bool can_elide_value_nullness(enum bpf_map_type type); + static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn, @@ -9354,9 +9362,16 @@ skip_type_check: err = check_helper_mem_access(env, regno, key_size, BPF_READ, false, NULL); if (err) return err; - meta->const_map_key = get_constant_map_key(env, reg, key_size); - if (meta->const_map_key < 0 && meta->const_map_key != -EOPNOTSUPP) - return meta->const_map_key; + if (can_elide_value_nullness(meta->map_ptr->map_type)) { + err = get_constant_map_key(env, reg, key_size, &meta->const_map_key); + if (err < 0) { + meta->const_map_key = -1; + if (err == -EOPNOTSUPP) + err = 0; + else + return err; + } + } break; case ARG_PTR_TO_MAP_VALUE: if (type_may_be_null(arg_type) && register_is_null(reg)) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d9061bd55436..afc665b7b1fe 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -4013,7 +4013,7 @@ static void __cgroup_kill(struct cgroup *cgrp) lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); - set_bit(CGRP_KILL, &cgrp->flags); + cgrp->kill_seq++; spin_unlock_irq(&css_set_lock); css_task_iter_start(&cgrp->self, CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED, &it); @@ -4029,10 +4029,6 @@ static void __cgroup_kill(struct cgroup *cgrp) send_sig(SIGKILL, task, 0); } css_task_iter_end(&it); - - spin_lock_irq(&css_set_lock); - clear_bit(CGRP_KILL, &cgrp->flags); - spin_unlock_irq(&css_set_lock); } static void cgroup_kill(struct cgroup *cgrp) @@ -6488,6 +6484,10 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) spin_lock_irq(&css_set_lock); cset = task_css_set(current); get_css_set(cset); + if (kargs->cgrp) + kargs->kill_seq = kargs->cgrp->kill_seq; + else + kargs->kill_seq = cset->dfl_cgrp->kill_seq; spin_unlock_irq(&css_set_lock); if (!(kargs->flags & CLONE_INTO_CGROUP)) { @@ -6668,6 +6668,7 @@ void cgroup_post_fork(struct task_struct *child, struct kernel_clone_args *kargs) __releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex) { + unsigned int cgrp_kill_seq = 0; unsigned long cgrp_flags = 0; bool kill = false; struct cgroup_subsys *ss; @@ -6681,10 +6682,13 @@ void cgroup_post_fork(struct task_struct *child, /* init tasks are special, only link regular threads */ if (likely(child->pid)) { - if (kargs->cgrp) + if (kargs->cgrp) { cgrp_flags = kargs->cgrp->flags; - else + cgrp_kill_seq = kargs->cgrp->kill_seq; + } else { cgrp_flags = cset->dfl_cgrp->flags; + cgrp_kill_seq = cset->dfl_cgrp->kill_seq; + } WARN_ON_ONCE(!list_empty(&child->cg_list)); cset->nr_tasks++; @@ -6719,7 +6723,7 @@ void cgroup_post_fork(struct task_struct *child, * child down right after we finished preparing it for * userspace. */ - kill = test_bit(CGRP_KILL, &cgrp_flags); + kill = kargs->kill_seq != cgrp_kill_seq; } spin_unlock_irq(&css_set_lock); diff --git a/kernel/cgroup/dmem.c b/kernel/cgroup/dmem.c index fbe34299673d..10b63433f057 100644 --- a/kernel/cgroup/dmem.c +++ b/kernel/cgroup/dmem.c @@ -220,60 +220,32 @@ dmem_cgroup_calculate_protection(struct dmem_cgroup_pool_state *limit_pool, struct dmem_cgroup_pool_state *test_pool) { struct page_counter *climit; - struct cgroup_subsys_state *css, *next_css; + struct cgroup_subsys_state *css; struct dmemcg_state *dmemcg_iter; - struct dmem_cgroup_pool_state *pool, *parent_pool; - bool found_descendant; + struct dmem_cgroup_pool_state *pool, *found_pool; climit = &limit_pool->cnt; rcu_read_lock(); - parent_pool = pool = limit_pool; - css = &limit_pool->cs->css; - /* - * This logic is roughly equivalent to css_foreach_descendant_pre, - * except we also track the parent pool to find out which pool we need - * to calculate protection values for. - * - * We can stop the traversal once we find test_pool among the - * descendants since we don't really care about any others. - */ - while (pool != test_pool) { - next_css = css_next_child(NULL, css); - if (next_css) { - parent_pool = pool; - } else { - while (css != &limit_pool->cs->css) { - next_css = css_next_child(css, css->parent); - if (next_css) - break; - css = css->parent; - parent_pool = pool_parent(parent_pool); - } - /* - * We can only hit this when test_pool is not a - * descendant of limit_pool. - */ - if (WARN_ON_ONCE(css == &limit_pool->cs->css)) - break; - } - css = next_css; - - found_descendant = false; + css_for_each_descendant_pre(css, &limit_pool->cs->css) { dmemcg_iter = container_of(css, struct dmemcg_state, css); + found_pool = NULL; list_for_each_entry_rcu(pool, &dmemcg_iter->pools, css_node) { - if (pool_parent(pool) == parent_pool) { - found_descendant = true; + if (pool->region == limit_pool->region) { + found_pool = pool; break; } } - if (!found_descendant) + if (!found_pool) continue; page_counter_calculate_protection( - climit, &pool->cnt, true); + climit, &found_pool->cnt, true); + + if (found_pool == test_pool) + break; } rcu_read_unlock(); } diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index 5877974ece92..aac91466279f 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -590,7 +590,6 @@ static void root_cgroup_cputime(struct cgroup_base_stat *bstat) cputime->sum_exec_runtime += user; cputime->sum_exec_runtime += sys; - cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL]; #ifdef CONFIG_SCHED_CORE bstat->forceidle_sum += cpustat[CPUTIME_FORCEIDLE]; diff --git a/kernel/events/core.c b/kernel/events/core.c index bcb09e011e9e..6364319e2f88 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4950,7 +4950,7 @@ static struct perf_event_pmu_context * find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx, struct perf_event *event) { - struct perf_event_pmu_context *new = NULL, *epc; + struct perf_event_pmu_context *new = NULL, *pos = NULL, *epc; void *task_ctx_data = NULL; if (!ctx->task) { @@ -5007,12 +5007,19 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx, atomic_inc(&epc->refcount); goto found_epc; } + /* Make sure the pmu_ctx_list is sorted by PMU type: */ + if (!pos && epc->pmu->type > pmu->type) + pos = epc; } epc = new; new = NULL; - list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); + if (!pos) + list_add_tail(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); + else + list_add(&epc->pmu_ctx_entry, pos->pmu_ctx_entry.prev); + epc->ctx = ctx; found_epc: @@ -5962,14 +5969,15 @@ static int _perf_event_period(struct perf_event *event, u64 value) if (!value) return -EINVAL; - if (event->attr.freq && value > sysctl_perf_event_sample_rate) - return -EINVAL; - - if (perf_event_check_period(event, value)) - return -EINVAL; - - if (!event->attr.freq && (value & (1ULL << 63))) - return -EINVAL; + if (event->attr.freq) { + if (value > sysctl_perf_event_sample_rate) + return -EINVAL; + } else { + if (perf_event_check_period(event, value)) + return -EINVAL; + if (value & (1ULL << 63)) + return -EINVAL; + } event_function_call(event, __perf_event_period, &value); @@ -8321,7 +8329,8 @@ void perf_event_exec(void) perf_event_enable_on_exec(ctx); perf_event_remove_on_exec(ctx); - perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true); + scoped_guard(rcu) + perf_iterate_ctx(ctx, perf_event_addr_filters_exec, NULL, true); perf_unpin_context(ctx); put_ctx(ctx); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2ca797cbe465..b4ca8898fe17 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -417,7 +417,7 @@ static void update_ref_ctr_warn(struct uprobe *uprobe, struct mm_struct *mm, short d) { pr_warn("ref_ctr %s failed for inode: 0x%lx offset: " - "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n", + "0x%llx ref_ctr_offset: 0x%llx of mm: 0x%p\n", d > 0 ? "increment" : "decrement", uprobe->inode->i_ino, (unsigned long long) uprobe->offset, (unsigned long long) uprobe->ref_ctr_offset, mm); @@ -495,6 +495,11 @@ retry: if (ret <= 0) goto put_old; + if (is_zero_page(old_page)) { + ret = -EINVAL; + goto put_old; + } + if (WARN(!is_register && PageCompound(old_page), "uprobe unregister should never work on compound page\n")) { ret = -EINVAL; @@ -762,10 +767,14 @@ static struct uprobe *hprobe_expire(struct hprobe *hprobe, bool get) enum hprobe_state hstate; /* - * return_instance's hprobe is protected by RCU. - * Underlying uprobe is itself protected from reuse by SRCU. + * Caller should guarantee that return_instance is not going to be + * freed from under us. This can be achieved either through holding + * rcu_read_lock() or by owning return_instance in the first place. + * + * Underlying uprobe is itself protected from reuse by SRCU, so ensure + * SRCU lock is held properly. */ - lockdep_assert(rcu_read_lock_held() && srcu_read_lock_held(&uretprobes_srcu)); + lockdep_assert(srcu_read_lock_held(&uretprobes_srcu)); hstate = READ_ONCE(hprobe->state); switch (hstate) { diff --git a/kernel/futex/core.c b/kernel/futex/core.c index ebdd76b4ecbb..3db8567f5a44 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -532,7 +532,8 @@ void futex_q_unlock(struct futex_hash_bucket *hb) futex_hb_waiters_dec(hb); } -void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) +void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, + struct task_struct *task) { int prio; @@ -548,7 +549,7 @@ void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) plist_node_init(&q->list, prio); plist_add(&q->list, &hb->chain); - q->task = current; + q->task = task; } /** diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 99b32e728c4a..6b2f4c7eb720 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -285,13 +285,15 @@ static inline int futex_get_value_locked(u32 *dest, u32 __user *from) } extern void __futex_unqueue(struct futex_q *q); -extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb); +extern void __futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, + struct task_struct *task); extern int futex_unqueue(struct futex_q *q); /** * futex_queue() - Enqueue the futex_q on the futex_hash_bucket * @q: The futex_q to enqueue * @hb: The destination hash bucket + * @task: Task queueing this futex * * The hb->lock must be held by the caller, and is released here. A call to * futex_queue() is typically paired with exactly one call to futex_unqueue(). The @@ -299,11 +301,14 @@ extern int futex_unqueue(struct futex_q *q); * or nothing if the unqueue is done as part of the wake process and the unqueue * state is implicit in the state of woken task (see futex_wait_requeue_pi() for * an example). + * + * Note that @task may be NULL, for async usage of futexes. */ -static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb) +static inline void futex_queue(struct futex_q *q, struct futex_hash_bucket *hb, + struct task_struct *task) __releases(&hb->lock) { - __futex_queue(q, hb); + __futex_queue(q, hb, task); spin_unlock(&hb->lock); } diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index daea650b16f5..7a941845f7ee 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -982,7 +982,7 @@ retry_private: /* * Only actually queue now that the atomic ops are done: */ - __futex_queue(&q, hb); + __futex_queue(&q, hb, current); if (trylock) { ret = rt_mutex_futex_trylock(&q.pi_state->pi_mutex); diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index eb86a7ade06a..25877d4f2f8f 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -349,7 +349,7 @@ void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q, * access to the hash list and forcing another memory barrier. */ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); - futex_queue(q, hb); + futex_queue(q, hb, current); /* Arm the timer */ if (timeout) @@ -460,7 +460,7 @@ retry: * next futex. Queue each futex at this moment so hb can * be unlocked. */ - futex_queue(q, hb); + futex_queue(q, hb, current); continue; } diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 5432418c0fea..875f25ed6f71 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -31,10 +31,6 @@ config GENERIC_IRQ_EFFECTIVE_AFF_MASK config GENERIC_PENDING_IRQ bool -# Deduce delayed migration from top-level interrupt chip flags -config GENERIC_PENDING_IRQ_CHIPFLAGS - bool - # Support for generic irq migrating off cpu before the cpu is offline. config GENERIC_IRQ_MIGRATION bool diff --git a/kernel/kthread.c b/kernel/kthread.c index 4005b13ebd7f..5dc5b0d7238e 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -859,7 +859,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) struct kthread *kthread = to_kthread(p); cpumask_var_t affinity; unsigned long flags; - int ret; + int ret = 0; if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) { WARN_ON(1); @@ -892,7 +892,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) out: free_cpumask_var(affinity); - return 0; + return ret; } /* diff --git a/kernel/rseq.c b/kernel/rseq.c index 442aba29bc4c..2cb16091ec0a 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -507,9 +507,6 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, return -EINVAL; if (!access_ok(rseq, rseq_len)) return -EFAULT; - current->rseq = rseq; - current->rseq_len = rseq_len; - current->rseq_sig = sig; #ifdef CONFIG_DEBUG_RSEQ /* * Initialize the in-kernel rseq fields copy for validation of @@ -522,6 +519,14 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, return -EFAULT; #endif /* + * Activate the registration by setting the rseq area address, length + * and signature in the task struct. + */ + current->rseq = rseq; + current->rseq_len = rseq_len; + current->rseq_sig = sig; + + /* * If rseq was previously inactive, and has just been * registered, ensure the cpu_id_start and cpu_id fields * are updated before returning to user-space. diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 83d46b9b8ec8..2b331822c7e7 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c @@ -150,7 +150,7 @@ void sched_autogroup_exit_task(struct task_struct *p) * see this thread after that: we can no longer use signal->autogroup. * See the PF_EXITING check in task_wants_autogroup(). */ - sched_move_task(p); + sched_move_task(p, true); } static void @@ -182,7 +182,7 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) * sched_autogroup_exit_task(). */ for_each_thread(p, t) - sched_move_task(t); + sched_move_task(t, true); unlock_task_sighand(p, &flags); autogroup_kref_put(prev); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 165c90ba64ea..67189907214d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1063,9 +1063,10 @@ void wake_up_q(struct wake_q_head *head) struct task_struct *task; task = container_of(node, struct task_struct, wake_q); - /* Task can safely be re-inserted now: */ node = node->next; - task->wake_q.next = NULL; + /* pairs with cmpxchg_relaxed() in __wake_q_add() */ + WRITE_ONCE(task->wake_q.next, NULL); + /* Task can safely be re-inserted now. */ /* * wake_up_process() executes a full barrier, which pairs with @@ -7284,7 +7285,7 @@ out_unlock: #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) int __sched __cond_resched(void) { - if (should_resched(0)) { + if (should_resched(0) && !irqs_disabled()) { preempt_schedule_common(); return 1; } @@ -9050,7 +9051,7 @@ static void sched_change_group(struct task_struct *tsk, struct task_group *group * now. This function just updates tsk->se.cfs_rq and tsk->se.parent to reflect * its new group. */ -void sched_move_task(struct task_struct *tsk) +void sched_move_task(struct task_struct *tsk, bool for_autogroup) { int queued, running, queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; @@ -9079,7 +9080,8 @@ void sched_move_task(struct task_struct *tsk) put_prev_task(rq, tsk); sched_change_group(tsk, group); - scx_move_task(tsk); + if (!for_autogroup) + scx_cgroup_move_task(tsk); if (queued) enqueue_task(rq, tsk, queue_flags); @@ -9180,7 +9182,7 @@ static void cpu_cgroup_attach(struct cgroup_taskset *tset) struct cgroup_subsys_state *css; cgroup_taskset_for_each(task, css, tset) - sched_move_task(task); + sched_move_task(task, false); scx_cgroup_finish_attach(); } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index fd7e85220715..ef047add7f9e 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1262,6 +1262,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, if (task_has_dl_policy(p)) { P(dl.runtime); P(dl.deadline); + } else if (fair_policy(p->policy)) { + P(se.slice); } #ifdef CONFIG_SCHED_CLASS_EXT __PS("ext.enabled", task_on_scx(p)); diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 8857c0709bdd..0f1da199cfc7 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -123,6 +123,19 @@ enum scx_ops_flags { SCX_OPS_SWITCH_PARTIAL = 1LLU << 3, /* + * A migration disabled task can only execute on its current CPU. By + * default, such tasks are automatically put on the CPU's local DSQ with + * the default slice on enqueue. If this ops flag is set, they also go + * through ops.enqueue(). + * + * A migration disabled task never invokes ops.select_cpu() as it can + * only select the current CPU. Also, p->cpus_ptr will only contain its + * current CPU while p->nr_cpus_allowed keeps tracking p->user_cpus_ptr + * and thus may disagree with cpumask_weight(p->cpus_ptr). + */ + SCX_OPS_ENQ_MIGRATION_DISABLED = 1LLU << 4, + + /* * CPU cgroup support flags */ SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* cpu.weight */ @@ -130,6 +143,7 @@ enum scx_ops_flags { SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE | SCX_OPS_ENQ_LAST | SCX_OPS_ENQ_EXITING | + SCX_OPS_ENQ_MIGRATION_DISABLED | SCX_OPS_SWITCH_PARTIAL | SCX_OPS_HAS_CGROUP_WEIGHT, }; @@ -416,7 +430,7 @@ struct sched_ext_ops { /** * @update_idle: Update the idle state of a CPU - * @cpu: CPU to udpate the idle state for + * @cpu: CPU to update the idle state for * @idle: whether entering or exiting the idle state * * This operation is called when @rq's CPU goes or leaves the idle @@ -882,6 +896,7 @@ static bool scx_warned_zero_slice; static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_last); static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_exiting); +static DEFINE_STATIC_KEY_FALSE(scx_ops_enq_migration_disabled); static DEFINE_STATIC_KEY_FALSE(scx_ops_cpu_preempt); static DEFINE_STATIC_KEY_FALSE(scx_builtin_idle_enabled); @@ -1214,7 +1229,7 @@ static bool scx_kf_allowed_if_unlocked(void) /** * nldsq_next_task - Iterate to the next task in a non-local DSQ - * @dsq: user dsq being interated + * @dsq: user dsq being iterated * @cur: current position, %NULL to start iteration * @rev: walk backwards * @@ -2014,6 +2029,11 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, unlikely(p->flags & PF_EXITING)) goto local; + /* see %SCX_OPS_ENQ_MIGRATION_DISABLED */ + if (!static_branch_unlikely(&scx_ops_enq_migration_disabled) && + is_migration_disabled(p)) + goto local; + if (!SCX_HAS_OP(enqueue)) goto global; @@ -2078,7 +2098,7 @@ static void set_task_runnable(struct rq *rq, struct task_struct *p) /* * list_add_tail() must be used. scx_ops_bypass() depends on tasks being - * appened to the runnable_list. + * appended to the runnable_list. */ list_add_tail(&p->scx.runnable_node, &rq->scx.runnable_list); } @@ -2313,12 +2333,35 @@ static void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags, * * - The BPF scheduler is bypassed while the rq is offline and we can always say * no to the BPF scheduler initiated migrations while offline. + * + * The caller must ensure that @p and @rq are on different CPUs. */ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, bool trigger_error) { int cpu = cpu_of(rq); + SCHED_WARN_ON(task_cpu(p) == cpu); + + /* + * If @p has migration disabled, @p->cpus_ptr is updated to contain only + * the pinned CPU in migrate_disable_switch() while @p is being switched + * out. However, put_prev_task_scx() is called before @p->cpus_ptr is + * updated and thus another CPU may see @p on a DSQ inbetween leading to + * @p passing the below task_allowed_on_cpu() check while migration is + * disabled. + * + * Test the migration disabled state first as the race window is narrow + * and the BPF scheduler failing to check migration disabled state can + * easily be masked if task_allowed_on_cpu() is done first. + */ + if (unlikely(is_migration_disabled(p))) { + if (trigger_error) + scx_ops_error("SCX_DSQ_LOCAL[_ON] cannot move migration disabled %s[%d] from CPU %d to %d", + p->comm, p->pid, task_cpu(p), cpu); + return false; + } + /* * We don't require the BPF scheduler to avoid dispatching to offline * CPUs mostly for convenience but also because CPUs can go offline @@ -2327,14 +2370,11 @@ static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, */ if (!task_allowed_on_cpu(p, cpu)) { if (trigger_error) - scx_ops_error("SCX_DSQ_LOCAL[_ON] verdict target cpu %d not allowed for %s[%d]", - cpu_of(rq), p->comm, p->pid); + scx_ops_error("SCX_DSQ_LOCAL[_ON] target CPU %d not allowed for %s[%d]", + cpu, p->comm, p->pid); return false; } - if (unlikely(is_migration_disabled(p))) - return false; - if (!scx_rq_online(rq)) return false; @@ -2437,7 +2477,8 @@ static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags, if (dst_dsq->id == SCX_DSQ_LOCAL) { dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq); - if (!task_can_run_on_remote_rq(p, dst_rq, true)) { + if (src_rq != dst_rq && + unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) { dst_dsq = find_global_dsq(p); dst_rq = src_rq; } @@ -2480,7 +2521,7 @@ static struct rq *move_task_between_dsqs(struct task_struct *p, u64 enq_flags, /* * A poorly behaving BPF scheduler can live-lock the system by e.g. incessantly * banging on the same DSQ on a large NUMA system to the point where switching - * to the bypass mode can take a long time. Inject artifical delays while the + * to the bypass mode can take a long time. Inject artificial delays while the * bypass mode is switching to guarantee timely completion. */ static void scx_ops_breather(struct rq *rq) @@ -2575,6 +2616,9 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, { struct rq *src_rq = task_rq(p); struct rq *dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq); +#ifdef CONFIG_SMP + struct rq *locked_rq = rq; +#endif /* * We're synchronized against dequeue through DISPATCHING. As @p can't @@ -2588,7 +2632,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, } #ifdef CONFIG_SMP - if (unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) { + if (src_rq != dst_rq && + unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) { dispatch_enqueue(find_global_dsq(p), p, enq_flags | SCX_ENQ_CLEAR_OPSS); return; @@ -2611,8 +2656,9 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE); /* switch to @src_rq lock */ - if (rq != src_rq) { - raw_spin_rq_unlock(rq); + if (locked_rq != src_rq) { + raw_spin_rq_unlock(locked_rq); + locked_rq = src_rq; raw_spin_rq_lock(src_rq); } @@ -2630,6 +2676,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, } else { move_remote_task_to_local_dsq(p, enq_flags, src_rq, dst_rq); + /* task has been moved to dst_rq, which is now locked */ + locked_rq = dst_rq; } /* if the destination CPU is idle, wake it up */ @@ -2638,8 +2686,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq, } /* switch back to @rq lock */ - if (rq != dst_rq) { - raw_spin_rq_unlock(dst_rq); + if (locked_rq != rq) { + raw_spin_rq_unlock(locked_rq); raw_spin_rq_lock(rq); } #else /* CONFIG_SMP */ @@ -3069,7 +3117,6 @@ static struct task_struct *pick_task_scx(struct rq *rq) { struct task_struct *prev = rq->curr; struct task_struct *p; - bool prev_on_scx = prev->sched_class == &ext_sched_class; bool keep_prev = rq->scx.flags & SCX_RQ_BAL_KEEP; bool kick_idle = false; @@ -3089,14 +3136,18 @@ static struct task_struct *pick_task_scx(struct rq *rq) * if pick_task_scx() is called without preceding balance_scx(). */ if (unlikely(rq->scx.flags & SCX_RQ_BAL_PENDING)) { - if (prev_on_scx) { + if (prev->scx.flags & SCX_TASK_QUEUED) { keep_prev = true; } else { keep_prev = false; kick_idle = true; } - } else if (unlikely(keep_prev && !prev_on_scx)) { - /* only allowed during transitions */ + } else if (unlikely(keep_prev && + prev->sched_class != &ext_sched_class)) { + /* + * Can happen while enabling as SCX_RQ_BAL_PENDING assertion is + * conditional on scx_enabled() and may have been skipped. + */ WARN_ON_ONCE(scx_ops_enable_state() == SCX_OPS_ENABLED); keep_prev = false; } @@ -3144,7 +3195,7 @@ static struct task_struct *pick_task_scx(struct rq *rq) * * Unless overridden by ops.core_sched_before(), @p->scx.core_sched_at is used * to implement the default task ordering. The older the timestamp, the higher - * prority the task - the global FIFO ordering matching the default scheduling + * priority the task - the global FIFO ordering matching the default scheduling * behavior. * * When ops.core_sched_before() is enabled, @p->scx.core_sched_at is used to @@ -3851,7 +3902,7 @@ static void task_tick_scx(struct rq *rq, struct task_struct *curr, int queued) curr->scx.slice = 0; touch_core_sched(rq, curr); } else if (SCX_HAS_OP(tick)) { - SCX_CALL_OP(SCX_KF_REST, tick, curr); + SCX_CALL_OP_TASK(SCX_KF_REST, tick, curr); } if (!curr->scx.slice) @@ -3998,7 +4049,7 @@ static void scx_ops_disable_task(struct task_struct *p) WARN_ON_ONCE(scx_get_task_state(p) != SCX_TASK_ENABLED); if (SCX_HAS_OP(disable)) - SCX_CALL_OP(SCX_KF_REST, disable, p); + SCX_CALL_OP_TASK(SCX_KF_REST, disable, p); scx_set_task_state(p, SCX_TASK_READY); } @@ -4027,7 +4078,7 @@ static void scx_ops_exit_task(struct task_struct *p) } if (SCX_HAS_OP(exit_task)) - SCX_CALL_OP(SCX_KF_REST, exit_task, p, &args); + SCX_CALL_OP_TASK(SCX_KF_REST, exit_task, p, &args); scx_set_task_state(p, SCX_TASK_NONE); } @@ -4323,25 +4374,12 @@ err: return ops_sanitize_err("cgroup_prep_move", ret); } -void scx_move_task(struct task_struct *p) +void scx_cgroup_move_task(struct task_struct *p) { if (!scx_cgroup_enabled) return; /* - * We're called from sched_move_task() which handles both cgroup and - * autogroup moves. Ignore the latter. - * - * Also ignore exiting tasks, because in the exit path tasks transition - * from the autogroup to the root group, so task_group_is_autogroup() - * alone isn't able to catch exiting autogroup tasks. This is safe for - * cgroup_move(), because cgroup migrations never happen for PF_EXITING - * tasks. - */ - if (task_group_is_autogroup(task_group(p)) || (p->flags & PF_EXITING)) - return; - - /* * @p must have ops.cgroup_prep_move() called on it and thus * cgrp_moving_from set. */ @@ -4590,7 +4628,7 @@ static int scx_cgroup_init(void) cgroup_warned_missing_idle = false; /* - * scx_tg_on/offline() are excluded thorugh scx_cgroup_rwsem. If we walk + * scx_tg_on/offline() are excluded through scx_cgroup_rwsem. If we walk * cgroups and init, all online cgroups are initialized. */ rcu_read_lock(); @@ -5059,6 +5097,7 @@ static void scx_ops_disable_workfn(struct kthread_work *work) static_branch_disable(&scx_has_op[i]); static_branch_disable(&scx_ops_enq_last); static_branch_disable(&scx_ops_enq_exiting); + static_branch_disable(&scx_ops_enq_migration_disabled); static_branch_disable(&scx_ops_cpu_preempt); static_branch_disable(&scx_builtin_idle_enabled); synchronize_rcu(); @@ -5277,9 +5316,10 @@ static void scx_dump_task(struct seq_buf *s, struct scx_dump_ctx *dctx, scx_get_task_state(p), p->scx.flags & ~SCX_TASK_STATE_MASK, p->scx.dsq_flags, ops_state & SCX_OPSS_STATE_MASK, ops_state >> SCX_OPSS_QSEQ_SHIFT); - dump_line(s, " sticky/holding_cpu=%d/%d dsq_id=%s dsq_vtime=%llu slice=%llu", - p->scx.sticky_cpu, p->scx.holding_cpu, dsq_id_buf, - p->scx.dsq_vtime, p->scx.slice); + dump_line(s, " sticky/holding_cpu=%d/%d dsq_id=%s", + p->scx.sticky_cpu, p->scx.holding_cpu, dsq_id_buf); + dump_line(s, " dsq_vtime=%llu slice=%llu weight=%u", + p->scx.dsq_vtime, p->scx.slice, p->scx.weight); dump_line(s, " cpus=%*pb", cpumask_pr_args(p->cpus_ptr)); if (SCX_HAS_OP(dump_task)) { @@ -5667,6 +5707,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link) if (ops->flags & SCX_OPS_ENQ_EXITING) static_branch_enable(&scx_ops_enq_exiting); + if (ops->flags & SCX_OPS_ENQ_MIGRATION_DISABLED) + static_branch_enable(&scx_ops_enq_migration_disabled); if (scx_ops.cpu_acquire || scx_ops.cpu_release) static_branch_enable(&scx_ops_cpu_preempt); diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 4d022d17ac7d..1079b56b0f7a 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -73,7 +73,7 @@ static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {} int scx_tg_online(struct task_group *tg); void scx_tg_offline(struct task_group *tg); int scx_cgroup_can_attach(struct cgroup_taskset *tset); -void scx_move_task(struct task_struct *p); +void scx_cgroup_move_task(struct task_struct *p); void scx_cgroup_finish_attach(void); void scx_cgroup_cancel_attach(struct cgroup_taskset *tset); void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight); @@ -82,7 +82,7 @@ void scx_group_set_idle(struct task_group *tg, bool idle); static inline int scx_tg_online(struct task_group *tg) { return 0; } static inline void scx_tg_offline(struct task_group *tg) {} static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; } -static inline void scx_move_task(struct task_struct *p) {} +static inline void scx_cgroup_move_task(struct task_struct *p) {} static inline void scx_cgroup_finish_attach(void) {} static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {} static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {} diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ce2e94ccad0c..1c0ef435a7aa 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5385,6 +5385,15 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq); static void set_delayed(struct sched_entity *se) { se->sched_delayed = 1; + + /* + * Delayed se of cfs_rq have no tasks queued on them. + * Do not adjust h_nr_runnable since dequeue_entities() + * will account it for blocked tasks. + */ + if (!entity_is_task(se)) + return; + for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); @@ -5397,6 +5406,16 @@ static void set_delayed(struct sched_entity *se) static void clear_delayed(struct sched_entity *se) { se->sched_delayed = 0; + + /* + * Delayed se of cfs_rq have no tasks queued on them. + * Do not adjust h_nr_runnable since a dequeue has + * already accounted for it or an enqueue of a task + * below it will account for it in enqueue_task_fair(). + */ + if (!entity_is_task(se)) + return; + for_each_sched_entity(se) { struct cfs_rq *cfs_rq = cfs_rq_of(se); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 38e0e323dda2..c8512a9fb022 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -572,7 +572,7 @@ extern void sched_online_group(struct task_group *tg, extern void sched_destroy_group(struct task_group *tg); extern void sched_release_group(struct task_group *tg); -extern void sched_move_task(struct task_struct *tsk); +extern void sched_move_task(struct task_struct *tsk, bool for_autogroup); #ifdef CONFIG_FAIR_GROUP_SCHED extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); @@ -3698,10 +3698,28 @@ static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm) { struct cpumask *cidmask = mm_cidmask(mm); struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; - int cid = __this_cpu_read(pcpu_cid->recent_cid); + int cid, max_nr_cid, allowed_max_nr_cid; + /* + * After shrinking the number of threads or reducing the number + * of allowed cpus, reduce the value of max_nr_cid so expansion + * of cid allocation will preserve cache locality if the number + * of threads or allowed cpus increase again. + */ + max_nr_cid = atomic_read(&mm->max_nr_cid); + while ((allowed_max_nr_cid = min_t(int, READ_ONCE(mm->nr_cpus_allowed), + atomic_read(&mm->mm_users))), + max_nr_cid > allowed_max_nr_cid) { + /* atomic_try_cmpxchg loads previous mm->max_nr_cid into max_nr_cid. */ + if (atomic_try_cmpxchg(&mm->max_nr_cid, &max_nr_cid, allowed_max_nr_cid)) { + max_nr_cid = allowed_max_nr_cid; + break; + } + } /* Try to re-use recent cid. This improves cache locality. */ - if (!mm_cid_is_unset(cid) && !cpumask_test_and_set_cpu(cid, cidmask)) + cid = __this_cpu_read(pcpu_cid->recent_cid); + if (!mm_cid_is_unset(cid) && cid < max_nr_cid && + !cpumask_test_and_set_cpu(cid, cidmask)) return cid; /* * Expand cid allocation if the maximum number of concurrency @@ -3709,8 +3727,9 @@ static inline int __mm_cid_try_get(struct task_struct *t, struct mm_struct *mm) * and number of threads. Expanding cid allocation as much as * possible improves cache locality. */ - cid = atomic_read(&mm->max_nr_cid); + cid = max_nr_cid; while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) { + /* atomic_try_cmpxchg loads previous mm->max_nr_cid into cid. */ if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1)) continue; if (!cpumask_test_and_set_cpu(cid, cidmask)) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index f59381c4a2ff..7bbb408431eb 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -749,6 +749,15 @@ static bool seccomp_is_const_allow(struct sock_fprog_kern *fprog, if (WARN_ON_ONCE(!fprog)) return false; + /* Our single exception to filtering. */ +#ifdef __NR_uretprobe +#ifdef SECCOMP_ARCH_COMPAT + if (sd->arch == SECCOMP_ARCH_NATIVE) +#endif + if (sd->nr == __NR_uretprobe) + return true; +#endif + for (pc = 0; pc < fprog->len; pc++) { struct sock_filter *insn = &fprog->filter[pc]; u16 code = insn->code; @@ -1023,6 +1032,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action, */ static const int mode1_syscalls[] = { __NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn, +#ifdef __NR_uretprobe + __NR_uretprobe, +#endif -1, /* negative terminated */ }; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7304d7cf47f2..2a7802ec480c 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -373,16 +373,18 @@ void clocksource_verify_percpu(struct clocksource *cs) cpumask_clear(&cpus_ahead); cpumask_clear(&cpus_behind); cpus_read_lock(); - preempt_disable(); + migrate_disable(); clocksource_verify_choose_cpus(); if (cpumask_empty(&cpus_chosen)) { - preempt_enable(); + migrate_enable(); cpus_read_unlock(); pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name); return; } testcpu = smp_processor_id(); - pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); + pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", + cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); + preempt_disable(); for_each_cpu(cpu, &cpus_chosen) { if (cpu == testcpu) continue; @@ -402,6 +404,7 @@ void clocksource_verify_percpu(struct clocksource *cs) cs_nsec_min = cs_nsec; } preempt_enable(); + migrate_enable(); cpus_read_unlock(); if (!cpumask_empty(&cpus_ahead)) pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n", diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f6d8df94045c..deb1aa32814e 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -58,6 +58,8 @@ #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) +static void retrigger_next_event(void *arg); + /* * The timer bases: * @@ -111,7 +113,8 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_TAI, .get_time = &ktime_get_clocktai, }, - } + }, + .csd = CSD_INIT(retrigger_next_event, NULL) }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { @@ -124,6 +127,14 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_TAI] = HRTIMER_BASE_TAI, }; +static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base) +{ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return true; + else + return likely(base->online); +} + /* * Functions and macros which are different for UP/SMP systems are kept in a * single place @@ -145,11 +156,6 @@ static struct hrtimer_cpu_base migration_cpu_base = { #define migration_base migration_cpu_base.clock_base[0] -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return base == &migration_base; -} - /* * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock * means that all timers which are tied to this base via timer->base are @@ -183,27 +189,54 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } /* - * We do not migrate the timer when it is expiring before the next - * event on the target cpu. When high resolution is enabled, we cannot - * reprogram the target cpu hardware and we would cause it to fire - * late. To keep it simple, we handle the high resolution enabled and - * disabled case similar. + * Check if the elected target is suitable considering its next + * event and the hotplug state of the current CPU. + * + * If the elected target is remote and its next event is after the timer + * to queue, then a remote reprogram is necessary. However there is no + * guarantee the IPI handling the operation would arrive in time to meet + * the high resolution deadline. In this case the local CPU becomes a + * preferred target, unless it is offline. + * + * High and low resolution modes are handled the same way for simplicity. * * Called with cpu_base->lock of target cpu held. */ -static int -hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) +static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base, + struct hrtimer_cpu_base *new_cpu_base, + struct hrtimer_cpu_base *this_cpu_base) { ktime_t expires; + /* + * The local CPU clockevent can be reprogrammed. Also get_target_base() + * guarantees it is online. + */ + if (new_cpu_base == this_cpu_base) + return true; + + /* + * The offline local CPU can't be the default target if the + * next remote target event is after this timer. Keep the + * elected new base. An IPI will we issued to reprogram + * it as a last resort. + */ + if (!hrtimer_base_is_online(this_cpu_base)) + return true; + expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); - return expires < new_base->cpu_base->expires_next; + + return expires >= new_base->cpu_base->expires_next; } -static inline -struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, - int pinned) +static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) { + if (!hrtimer_base_is_online(base)) { + int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER)); + + return &per_cpu(hrtimer_bases, cpu); + } + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) if (static_branch_likely(&timers_migration_enabled) && !pinned) return &per_cpu(hrtimer_bases, get_nohz_timer_target()); @@ -254,8 +287,8 @@ again: raw_spin_unlock(&base->cpu_base->lock); raw_spin_lock(&new_base->cpu_base->lock); - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, + this_cpu_base)) { raw_spin_unlock(&new_base->cpu_base->lock); raw_spin_lock(&base->cpu_base->lock); new_cpu_base = this_cpu_base; @@ -264,8 +297,7 @@ again: } WRITE_ONCE(timer->base, new_base); } else { - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { new_cpu_base = this_cpu_base; goto again; } @@ -275,11 +307,6 @@ again: #else /* CONFIG_SMP */ -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return false; -} - static inline struct hrtimer_clock_base * lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) __acquires(&timer->base->cpu_base->lock) @@ -716,8 +743,6 @@ static inline int hrtimer_is_hres_enabled(void) return hrtimer_hres_enabled; } -static void retrigger_next_event(void *arg); - /* * Switch to high resolution mode */ @@ -1205,6 +1230,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode, struct hrtimer_clock_base *base) { + struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *new_base; bool force_local, first; @@ -1216,10 +1242,16 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * and enforce reprogramming after it is queued no matter whether * it is the new first expiring timer again or not. */ - force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases); + force_local = base->cpu_base == this_cpu_base; force_local &= base->cpu_base->next_timer == timer; /* + * Don't force local queuing if this enqueue happens on a unplugged + * CPU after hrtimer_cpu_dying() has been invoked. + */ + force_local &= this_cpu_base->online; + + /* * Remove an active timer from the queue. In case it is not queued * on the current CPU, make sure that remove_hrtimer() updates the * remote data correctly. @@ -1248,8 +1280,27 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, } first = enqueue_hrtimer(timer, new_base, mode); - if (!force_local) - return first; + if (!force_local) { + /* + * If the current CPU base is online, then the timer is + * never queued on a remote CPU if it would be the first + * expiring timer there. + */ + if (hrtimer_base_is_online(this_cpu_base)) + return first; + + /* + * Timer was enqueued remote because the current base is + * already offline. If the timer is the first to expire, + * kick the remote CPU to reprogram the clock event. + */ + if (first) { + struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base; + + smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd); + } + return 0; + } /* * Timer was forced to stay on the current CPU to avoid @@ -1370,6 +1421,18 @@ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, } } +#ifdef CONFIG_SMP +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return base == &migration_base; +} +#else +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return false; +} +#endif + /* * This function is called on PREEMPT_RT kernels when the fast path * deletion of a timer failed because the timer callback function was diff --git a/kernel/time/timer_migration.c b/kernel/time/timer_migration.c index 9cb9b6584ea1..2f6330831f08 100644 --- a/kernel/time/timer_migration.c +++ b/kernel/time/timer_migration.c @@ -1675,6 +1675,9 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) } while (i < tmigr_hierarchy_levels); + /* Assert single root */ + WARN_ON_ONCE(!err && !group->parent && !list_is_singular(&tmigr_level_list[top])); + while (i > 0) { group = stack[--i]; @@ -1716,7 +1719,12 @@ static int tmigr_setup_groups(unsigned int cpu, unsigned int node) WARN_ON_ONCE(top == 0); lvllist = &tmigr_level_list[top]; - if (group->num_children == 1 && list_is_singular(lvllist)) { + + /* + * Newly created root level should have accounted the upcoming + * CPU's child group and pre-accounted the old root. + */ + if (group->num_children == 2 && list_is_singular(lvllist)) { /* * The target CPU must never do the prepare work, except * on early boot when the boot CPU is the target. Otherwise diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 2560b312ad57..33082c4e8154 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -403,13 +403,12 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num) lockdep_assert_held(&fprobe_mutex); fprobe_graph_active--; - if (!fprobe_graph_active) { - /* Q: should we unregister it ? */ + /* Q: should we unregister it ? */ + if (!fprobe_graph_active) unregister_ftrace_graph(&fprobe_graph_ops); - return; - } - ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); + if (num) + ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); } static int symbols_cmp(const void *a, const void *b) @@ -679,8 +678,7 @@ int unregister_fprobe(struct fprobe *fp) } del_fprobe_hash(fp); - if (count) - fprobe_graph_remove_ips(addrs, count); + fprobe_graph_remove_ips(addrs, count); kfree_rcu(hlist_array, rcu); fp->hlist_array = NULL; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 728ecda6e8d4..fc88e0688daf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -540,6 +540,7 @@ static int function_stat_show(struct seq_file *m, void *v) static struct trace_seq s; unsigned long long avg; unsigned long long stddev; + unsigned long long stddev_denom; #endif guard(mutex)(&ftrace_profile_lock); @@ -559,23 +560,19 @@ static int function_stat_show(struct seq_file *m, void *v) #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_puts(m, " "); - /* Sample standard deviation (s^2) */ - if (rec->counter <= 1) - stddev = 0; - else { - /* - * Apply Welford's method: - * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) - */ + /* + * Variance formula: + * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) + * Maybe Welford's method is better here? + * Divide only by 1000 for ns^2 -> us^2 conversion. + * trace_print_graph_duration will divide by 1000 again. + */ + stddev = 0; + stddev_denom = rec->counter * (rec->counter - 1) * 1000; + if (stddev_denom) { stddev = rec->counter * rec->time_squared - rec->time * rec->time; - - /* - * Divide only 1000 for ns^2 -> us^2 conversion. - * trace_print_graph_duration will divide 1000 again. - */ - stddev = div64_ul(stddev, - rec->counter * (rec->counter - 1) * 1000); + stddev = div64_ul(stddev, stddev_denom); } trace_seq_init(&s); @@ -3220,15 +3217,22 @@ static struct ftrace_hash *copy_hash(struct ftrace_hash *src) * The filter_hash updates uses just the append_hash() function * and the notrace_hash does not. */ -static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash) +static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash, + int size_bits) { struct ftrace_func_entry *entry; int size; int i; - /* An empty hash does everything */ - if (ftrace_hash_empty(*hash)) - return 0; + if (*hash) { + /* An empty hash does everything */ + if (ftrace_hash_empty(*hash)) + return 0; + } else { + *hash = alloc_ftrace_hash(size_bits); + if (!*hash) + return -ENOMEM; + } /* If new_hash has everything make hash have everything */ if (ftrace_hash_empty(new_hash)) { @@ -3292,16 +3296,18 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has /* Return a new hash that has a union of all @ops->filter_hash entries */ static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) { - struct ftrace_hash *new_hash; + struct ftrace_hash *new_hash = NULL; struct ftrace_ops *subops; + int size_bits; int ret; - new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits); - if (!new_hash) - return NULL; + if (ops->func_hash->filter_hash) + size_bits = ops->func_hash->filter_hash->size_bits; + else + size_bits = FTRACE_HASH_DEFAULT_BITS; list_for_each_entry(subops, &ops->subop_list, list) { - ret = append_hash(&new_hash, subops->func_hash->filter_hash); + ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits); if (ret < 0) { free_ftrace_hash(new_hash); return NULL; @@ -3310,7 +3316,8 @@ static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) if (ftrace_hash_empty(new_hash)) break; } - return new_hash; + /* Can't return NULL as that means this failed */ + return new_hash ? : EMPTY_HASH; } /* Make @ops trace evenything except what all its subops do not trace */ @@ -3505,7 +3512,8 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash); if (!filter_hash) return -ENOMEM; - ret = append_hash(&filter_hash, subops->func_hash->filter_hash); + ret = append_hash(&filter_hash, subops->func_hash->filter_hash, + size_bits); if (ret < 0) { free_ftrace_hash(filter_hash); return ret; @@ -5707,6 +5715,9 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) return -ENOENT; free_hash_entry(hash, entry); return 0; + } else if (__ftrace_lookup_ip(hash, ip) != NULL) { + /* Already exists */ + return 0; } entry = add_hash_entry(hash, ip); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index b8e0ae15ca5b..bb6089c2951e 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1672,7 +1672,8 @@ static void *rb_range_buffer(struct ring_buffer_per_cpu *cpu_buffer, int idx) * must be the same. */ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, - struct trace_buffer *buffer, int nr_pages) + struct trace_buffer *buffer, int nr_pages, + unsigned long *subbuf_mask) { int subbuf_size = PAGE_SIZE; struct buffer_data_page *subbuf; @@ -1680,6 +1681,9 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, unsigned long buffers_end; int i; + if (!subbuf_mask) + return false; + /* Check the meta magic and meta struct size */ if (meta->magic != RING_BUFFER_META_MAGIC || meta->struct_size != sizeof(*meta)) { @@ -1712,6 +1716,8 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, subbuf = rb_subbufs_from_meta(meta); + bitmap_clear(subbuf_mask, 0, meta->nr_subbufs); + /* Is the meta buffers and the subbufs themselves have correct data? */ for (i = 0; i < meta->nr_subbufs; i++) { if (meta->buffers[i] < 0 || @@ -1725,6 +1731,12 @@ static bool rb_meta_valid(struct ring_buffer_meta *meta, int cpu, return false; } + if (test_bit(meta->buffers[i], subbuf_mask)) { + pr_info("Ring buffer boot meta [%d] array has duplicates\n", cpu); + return false; + } + + set_bit(meta->buffers[i], subbuf_mask); subbuf = (void *)subbuf + subbuf_size; } @@ -1838,6 +1850,11 @@ static void rb_meta_validate_events(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->cpu); goto invalid; } + + /* If the buffer has content, update pages_touched */ + if (ret) + local_inc(&cpu_buffer->pages_touched); + entries += ret; entry_bytes += local_read(&head_page->page->commit); local_set(&cpu_buffer->head_page->entries, ret); @@ -1889,17 +1906,22 @@ static void rb_meta_init_text_addr(struct ring_buffer_meta *meta) static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) { struct ring_buffer_meta *meta; + unsigned long *subbuf_mask; unsigned long delta; void *subbuf; int cpu; int i; + /* Create a mask to test the subbuf array */ + subbuf_mask = bitmap_alloc(nr_pages + 1, GFP_KERNEL); + /* If subbuf_mask fails to allocate, then rb_meta_valid() will return false */ + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { void *next_meta; meta = rb_range_meta(buffer, nr_pages, cpu); - if (rb_meta_valid(meta, cpu, buffer, nr_pages)) { + if (rb_meta_valid(meta, cpu, buffer, nr_pages, subbuf_mask)) { /* Make the mappings match the current address */ subbuf = rb_subbufs_from_meta(meta); delta = (unsigned long)subbuf - meta->first_buffer; @@ -1943,6 +1965,7 @@ static void rb_range_meta_init(struct trace_buffer *buffer, int nr_pages) subbuf += meta->subbuf_size; } } + bitmap_free(subbuf_mask); } static void *rbm_start(struct seq_file *m, loff_t *pos) @@ -7126,6 +7149,7 @@ int ring_buffer_map(struct trace_buffer *buffer, int cpu, kfree(cpu_buffer->subbuf_ids); cpu_buffer->subbuf_ids = NULL; rb_free_meta_page(cpu_buffer); + atomic_dec(&cpu_buffer->resize_disabled); } unlock: diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 1496a5ac33ae..0e6d517e74e0 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5977,8 +5977,6 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr, ssize_t tracing_resize_ring_buffer(struct trace_array *tr, unsigned long size, int cpu_id) { - int ret; - guard(mutex)(&trace_types_lock); if (cpu_id != RING_BUFFER_ALL_CPUS) { @@ -5987,11 +5985,7 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr, return -EINVAL; } - ret = __tracing_resize_ring_buffer(tr, size, cpu_id); - if (ret < 0) - ret = -ENOMEM; - - return ret; + return __tracing_resize_ring_buffer(tr, size, cpu_id); } static void update_last_data(struct trace_array *tr) @@ -8285,6 +8279,10 @@ static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma) struct trace_iterator *iter = &info->iter; int ret = 0; + /* Currently the boot mapped buffer is not supported for mmap */ + if (iter->tr->flags & TRACE_ARRAY_FL_BOOT) + return -ENODEV; + ret = get_snapshot_map(iter->tr); if (ret) return ret; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4cb275316e51..513de9ceb80e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1591,6 +1591,13 @@ s_next(struct seq_file *m, void *v, loff_t *pos) return iter; #endif + /* + * The iter is allocated in s_start() and passed via the 'v' + * parameter. To stop the iterator, NULL must be returned. But + * the return value is what the 'v' parameter in s_stop() receives + * and frees. Free iter here as it will no longer be used. + */ + kfree(iter); return NULL; } @@ -1667,9 +1674,9 @@ static int s_show(struct seq_file *m, void *v) } #endif -static void s_stop(struct seq_file *m, void *p) +static void s_stop(struct seq_file *m, void *v) { - kfree(p); + kfree(v); t_stop(m, NULL); } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 261163b00137..ad7419e24055 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6724,27 +6724,27 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, if (existing_hist_update_only(glob, trigger_data, file)) goto out_free; - ret = event_trigger_register(cmd_ops, file, glob, trigger_data); - if (ret < 0) - goto out_free; + if (!get_named_trigger_data(trigger_data)) { - if (get_named_trigger_data(trigger_data)) - goto enable; + ret = create_actions(hist_data); + if (ret) + goto out_free; - ret = create_actions(hist_data); - if (ret) - goto out_unreg; + if (has_hist_vars(hist_data) || hist_data->n_var_refs) { + ret = save_hist_vars(hist_data); + if (ret) + goto out_free; + } - if (has_hist_vars(hist_data) || hist_data->n_var_refs) { - ret = save_hist_vars(hist_data); + ret = tracing_map_init(hist_data->map); if (ret) - goto out_unreg; + goto out_free; } - ret = tracing_map_init(hist_data->map); - if (ret) - goto out_unreg; -enable: + ret = event_trigger_register(cmd_ops, file, glob, trigger_data); + if (ret < 0) + goto out_free; + ret = hist_trigger_enable(trigger_data, file); if (ret) goto out_unreg; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index d358c9935164..df56f9b76010 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -216,7 +216,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, parent_ip = function_get_true_parent_ip(parent_ip, fregs); - trace_ctx = tracing_gen_ctx(); + trace_ctx = tracing_gen_ctx_dec(); data = this_cpu_ptr(tr->array_buffer.data); if (!atomic_read(&data->disabled)) @@ -321,7 +321,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, struct trace_array *tr = op->private; struct trace_array_cpu *data; unsigned int trace_ctx; - unsigned long flags; int bit; if (unlikely(!tr->function_enabled)) @@ -347,8 +346,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, if (is_repeat_check(tr, last_info, ip, parent_ip)) goto out; - local_save_flags(flags); - trace_ctx = tracing_gen_ctx_flags(flags); + trace_ctx = tracing_gen_ctx_dec(); process_repeats(tr, ip, parent_ip, last_info, trace_ctx); trace_function(tr, ip, parent_ip, trace_ctx); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 54d850997c0a..136c750b0b4d 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -198,7 +198,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, * returning from the function. */ if (ftrace_graph_notrace_addr(trace->func)) { - *task_var |= TRACE_GRAPH_NOTRACE_BIT; + *task_var |= TRACE_GRAPH_NOTRACE; /* * Need to return 1 to have the return called * that will clear the NOTRACE bit. diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index 8800f5acc007..2ef2e1b80091 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -133,7 +133,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), vtsk = kzalloc(sizeof(*vtsk), GFP_KERNEL); if (!vtsk) - return NULL; + return ERR_PTR(-ENOMEM); init_completion(&vtsk->exited); mutex_init(&vtsk->exit_mutex); vtsk->data = arg; @@ -145,7 +145,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args); if (IS_ERR(tsk)) { kfree(vtsk); - return NULL; + return ERR_PTR(PTR_ERR(tsk)); } vtsk->task = tsk; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3c2c45313c88..bfe030b443e2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2254,8 +2254,10 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, * queues a new work item to a wq after destroy_workqueue(wq). */ if (unlikely(wq->flags & (__WQ_DESTROYING | __WQ_DRAINING) && - WARN_ON_ONCE(!is_chained_work(wq)))) + WARN_ONCE(!is_chained_work(wq), "workqueue: cannot queue %ps on wq %s\n", + work->func, wq->name))) { return; + } rcu_read_lock(); retry: /* pwq which will be used unless @work is executing elsewhere */ @@ -3517,12 +3519,6 @@ repeat: } /* - * Put the reference grabbed by send_mayday(). @pool won't - * go away while we're still attached to it. - */ - put_pwq(pwq); - - /* * Leave this pool. Notify regular workers; otherwise, we end up * with 0 concurrency and stalling the execution. */ @@ -3532,6 +3528,12 @@ repeat: worker_detach_from_pool(rescuer); + /* + * Put the reference grabbed by send_mayday(). @pool might + * go away any time after it. + */ + put_pwq_unlocked(pwq); + raw_spin_lock_irq(&wq_mayday_lock); } diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 9ec806f989f2..65f550cb5081 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1428,6 +1428,8 @@ static ssize_t __import_iovec_ubuf(int type, const struct iovec __user *uvec, struct iovec *iov = *iovp; ssize_t ret; + *iovp = NULL; + if (compat) ret = copy_compat_iovec_from_user(iov, uvec, 1); else @@ -1438,7 +1440,6 @@ static ssize_t __import_iovec_ubuf(int type, const struct iovec __user *uvec, ret = import_ubuf(type, iov->iov_base, iov->iov_len, i); if (unlikely(ret)) return ret; - *iovp = NULL; return i->count; } diff --git a/lib/rcuref.c b/lib/rcuref.c index 97f300eca927..5bd726b71e39 100644 --- a/lib/rcuref.c +++ b/lib/rcuref.c @@ -220,6 +220,7 @@ EXPORT_SYMBOL_GPL(rcuref_get_slowpath); /** * rcuref_put_slowpath - Slowpath of __rcuref_put() * @ref: Pointer to the reference count + * @cnt: The resulting value of the fastpath decrement * * Invoked when the reference count is outside of the valid zone. * @@ -233,10 +234,8 @@ EXPORT_SYMBOL_GPL(rcuref_get_slowpath); * with a concurrent get()/put() pair. Caller is not allowed to * deconstruct the protected object. */ -bool rcuref_put_slowpath(rcuref_t *ref) +bool rcuref_put_slowpath(rcuref_t *ref, unsigned int cnt) { - unsigned int cnt = atomic_read(&ref->refcnt); - /* Did this drop the last reference? */ if (likely(cnt == RCUREF_NOREF)) { /* diff --git a/lib/stackinit_kunit.c b/lib/stackinit_kunit.c index fbe910c9c825..135322592faf 100644 --- a/lib/stackinit_kunit.c +++ b/lib/stackinit_kunit.c @@ -75,8 +75,10 @@ static bool stackinit_range_contains(char *haystack_start, size_t haystack_size, */ #ifdef CONFIG_M68K #define FILL_SIZE_STRING 8 +#define FILL_SIZE_ARRAY 2 #else #define FILL_SIZE_STRING 16 +#define FILL_SIZE_ARRAY 8 #endif #define INIT_CLONE_SCALAR /**/ @@ -345,11 +347,11 @@ union test_small_start { short three; unsigned long four; struct big_struct { - unsigned long array[8]; + unsigned long array[FILL_SIZE_ARRAY]; } big; }; -/* Mismatched sizes, with one and two being small */ +/* Mismatched sizes, with three and four being small */ union test_small_end { short one; unsigned long two; diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 6932a26f4927..0e865bab4a10 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1418,7 +1418,7 @@ static noinline void check_pause(struct xarray *xa) { XA_STATE(xas, xa, 0); void *entry; - unsigned int order; + int order; unsigned long index = 1; unsigned int count = 0; @@ -1450,7 +1450,7 @@ static noinline void check_pause(struct xarray *xa) xa_destroy(xa); index = 0; - for (order = XA_CHUNK_SHIFT; order > 0; order--) { + for (order = order_limit - 1; order >= 0; order--) { XA_BUG_ON(xa, xa_store_order(xa, index, order, xa_mk_index(index), GFP_KERNEL)); index += 1UL << order; @@ -1462,24 +1462,25 @@ static noinline void check_pause(struct xarray *xa) rcu_read_lock(); xas_for_each(&xas, entry, ULONG_MAX) { XA_BUG_ON(xa, entry != xa_mk_index(index)); - index += 1UL << (XA_CHUNK_SHIFT - count); + index += 1UL << (order_limit - count - 1); count++; } rcu_read_unlock(); - XA_BUG_ON(xa, count != XA_CHUNK_SHIFT); + XA_BUG_ON(xa, count != order_limit); index = 0; count = 0; - xas_set(&xas, XA_CHUNK_SIZE / 2 + 1); + /* test unaligned index */ + xas_set(&xas, 1 % (1UL << (order_limit - 1))); rcu_read_lock(); xas_for_each(&xas, entry, ULONG_MAX) { XA_BUG_ON(xa, entry != xa_mk_index(index)); - index += 1UL << (XA_CHUNK_SHIFT - count); + index += 1UL << (order_limit - count - 1); count++; xas_pause(&xas); } rcu_read_unlock(); - XA_BUG_ON(xa, count != XA_CHUNK_SHIFT); + XA_BUG_ON(xa, count != order_limit); xa_destroy(xa); diff --git a/mm/filemap.c b/mm/filemap.c index 804d7365680c..d4564a79eb35 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -445,7 +445,7 @@ EXPORT_SYMBOL(filemap_fdatawrite_range); * filemap_fdatawrite_range_kick - start writeback on a range * @mapping: target address_space * @start: index to start writeback on - * @end: last (non-inclusive) index for writeback + * @end: last (inclusive) index for writeback * * This is a non-integrity writeback helper, to start writing back folios * for the indicated range. diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 65068671e460..09d6673040ed 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3145,7 +3145,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid) /* do node specific alloc */ if (nid != NUMA_NO_NODE) { - m = memblock_alloc_try_nid_raw(huge_page_size(h), huge_page_size(h), + m = memblock_alloc_exact_nid_raw(huge_page_size(h), huge_page_size(h), 0, MEMBLOCK_ALLOC_ACCESSIBLE, nid); if (!m) return 0; @@ -5447,7 +5447,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, if (src_ptl != dst_ptl) spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); - pte = huge_ptep_get_and_clear(mm, old_addr, src_pte); + pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz); if (need_clear_uffd_wp && pte_marker_uffd_wp(pte)) huge_pte_clear(mm, new_addr, dst_pte, sz); @@ -5622,7 +5622,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, set_vma_resv_flags(vma, HPAGE_RESV_UNMAPPED); } - pte = huge_ptep_get_and_clear(mm, address, ptep); + pte = huge_ptep_get_and_clear(mm, address, ptep, sz); tlb_remove_huge_tlb_entry(h, tlb, ptep, address); if (huge_pte_dirty(pte)) set_page_dirty(page); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 3fe77a360f1c..8357e1a33699 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -370,6 +370,36 @@ static inline bool init_task_stack_addr(const void *addr) sizeof(init_thread_union.stack)); } +/* + * This function is invoked with report_lock (a raw_spinlock) held. A + * PREEMPT_RT kernel cannot call find_vm_area() as it will acquire a sleeping + * rt_spinlock. + * + * For !RT kernel, the PROVE_RAW_LOCK_NESTING config option will print a + * lockdep warning for this raw_spinlock -> spinlock dependency. This config + * option is enabled by default to ensure better test coverage to expose this + * kind of RT kernel problem. This lockdep splat, however, can be suppressed + * by using DEFINE_WAIT_OVERRIDE_MAP() if it serves a useful purpose and the + * invalid PREEMPT_RT case has been taken care of. + */ +static inline struct vm_struct *kasan_find_vm_area(void *addr) +{ + static DEFINE_WAIT_OVERRIDE_MAP(vmalloc_map, LD_WAIT_SLEEP); + struct vm_struct *va; + + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + return NULL; + + /* + * Suppress lockdep warning and fetch vmalloc area of the + * offending address. + */ + lock_map_acquire_try(&vmalloc_map); + va = find_vm_area(addr); + lock_map_release(&vmalloc_map); + return va; +} + static void print_address_description(void *addr, u8 tag, struct kasan_report_info *info) { @@ -399,7 +429,7 @@ static void print_address_description(void *addr, u8 tag, } if (is_vmalloc_addr(addr)) { - struct vm_struct *va = find_vm_area(addr); + struct vm_struct *va = kasan_find_vm_area(addr); if (va) { pr_err("The buggy address belongs to the virtual mapping at\n" @@ -409,6 +439,8 @@ static void print_address_description(void *addr, u8 tag, pr_err("\n"); page = vmalloc_to_page(addr); + } else { + pr_err("The buggy address %px belongs to a vmalloc virtual mapping\n", addr); } } diff --git a/mm/madvise.c b/mm/madvise.c index 49f3a75046f6..08b207f8e61e 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -933,7 +933,16 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, */ end = vma->vm_end; } - VM_WARN_ON(start >= end); + /* + * If the memory region between start and end was + * originally backed by 4kB pages and then remapped to + * be backed by hugepages while mmap_lock was dropped, + * the adjustment for hugetlb vma above may have rounded + * end down to the start address. + */ + if (start == end) + return 0; + VM_WARN_ON(start > end); } if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 46f8b372d212..4de6acb9b8ec 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4166,6 +4166,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, memcg_memory_event(memcg, MEMCG_OOM); if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0)) break; + cond_resched(); } memcg_wb_domain_size_changed(memcg); diff --git a/mm/memory.c b/mm/memory.c index 539c0f7c6d54..b4d3d4893267 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1719,7 +1719,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, pmd_t pmdval; unsigned long start = addr; bool can_reclaim_pt = reclaim_pt_is_enabled(start, end, details); - bool direct_reclaim = false; + bool direct_reclaim = true; int nr; retry: @@ -1734,8 +1734,10 @@ retry: do { bool any_skipped = false; - if (need_resched()) + if (need_resched()) { + direct_reclaim = false; break; + } nr = do_zap_pte_range(tlb, vma, pte, addr, end, details, rss, &force_flush, &force_break, &any_skipped); @@ -1743,11 +1745,20 @@ retry: can_reclaim_pt = false; if (unlikely(force_break)) { addr += nr * PAGE_SIZE; + direct_reclaim = false; break; } } while (pte += nr, addr += PAGE_SIZE * nr, addr != end); - if (can_reclaim_pt && addr == end) + /* + * Fast path: try to hold the pmd lock and unmap the PTE page. + * + * If the pte lock was released midway (retry case), or if the attempt + * to hold the pmd lock failed, then we need to recheck all pte entries + * to ensure they are still none, thereby preventing the pte entries + * from being repopulated by another thread. + */ + if (can_reclaim_pt && direct_reclaim && addr == end) direct_reclaim = try_get_and_clear_pmd(mm, pmd, &pmdval); add_mm_rss_vec(mm, rss); diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 9cf26592ac93..5bd888223cc8 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -840,20 +840,15 @@ void migrate_device_finalize(unsigned long *src_pfns, dst = src; } + if (!folio_is_zone_device(dst)) + folio_add_lru(dst); remove_migration_ptes(src, dst, 0); folio_unlock(src); - - if (folio_is_zone_device(src)) - folio_put(src); - else - folio_putback_lru(src); + folio_put(src); if (dst != src) { folio_unlock(dst); - if (folio_is_zone_device(dst)) - folio_put(dst); - else - folio_putback_lru(dst); + folio_put(dst); } } } diff --git a/mm/truncate.c b/mm/truncate.c index e2e115adfbc5..76d8fcd89bd0 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -548,8 +548,6 @@ int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - if (folio_test_dirty(folio)) - return 0; if (folio_mapped(folio)) unmap_mapping_folio(folio); BUG_ON(folio_mapped(folio)); diff --git a/mm/zswap.c b/mm/zswap.c index 6504174fbc6a..ac9d299e7d0c 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1445,9 +1445,9 @@ resched: * main API **********************************/ -static ssize_t zswap_store_page(struct page *page, - struct obj_cgroup *objcg, - struct zswap_pool *pool) +static bool zswap_store_page(struct page *page, + struct obj_cgroup *objcg, + struct zswap_pool *pool) { swp_entry_t page_swpentry = page_swap_entry(page); struct zswap_entry *entry, *old; @@ -1456,7 +1456,7 @@ static ssize_t zswap_store_page(struct page *page, entry = zswap_entry_cache_alloc(GFP_KERNEL, page_to_nid(page)); if (!entry) { zswap_reject_kmemcache_fail++; - return -EINVAL; + return false; } if (!zswap_compress(page, entry, pool)) @@ -1483,13 +1483,17 @@ static ssize_t zswap_store_page(struct page *page, /* * The entry is successfully compressed and stored in the tree, there is - * no further possibility of failure. Grab refs to the pool and objcg. - * These refs will be dropped by zswap_entry_free() when the entry is - * removed from the tree. + * no further possibility of failure. Grab refs to the pool and objcg, + * charge zswap memory, and increment zswap_stored_pages. + * The opposite actions will be performed by zswap_entry_free() + * when the entry is removed from the tree. */ zswap_pool_get(pool); - if (objcg) + if (objcg) { obj_cgroup_get(objcg); + obj_cgroup_charge_zswap(objcg, entry->length); + } + atomic_long_inc(&zswap_stored_pages); /* * We finish initializing the entry while it's already in xarray. @@ -1510,13 +1514,13 @@ static ssize_t zswap_store_page(struct page *page, zswap_lru_add(&zswap_list_lru, entry); } - return entry->length; + return true; store_failed: zpool_free(pool->zpool, entry->handle); compress_failed: zswap_entry_cache_free(entry); - return -EINVAL; + return false; } bool zswap_store(struct folio *folio) @@ -1526,7 +1530,6 @@ bool zswap_store(struct folio *folio) struct obj_cgroup *objcg = NULL; struct mem_cgroup *memcg = NULL; struct zswap_pool *pool; - size_t compressed_bytes = 0; bool ret = false; long index; @@ -1564,20 +1567,14 @@ bool zswap_store(struct folio *folio) for (index = 0; index < nr_pages; ++index) { struct page *page = folio_page(folio, index); - ssize_t bytes; - bytes = zswap_store_page(page, objcg, pool); - if (bytes < 0) + if (!zswap_store_page(page, objcg, pool)) goto put_pool; - compressed_bytes += bytes; } - if (objcg) { - obj_cgroup_charge_zswap(objcg, compressed_bytes); + if (objcg) count_objcg_events(objcg, ZSWPOUT, nr_pages); - } - atomic_long_add(nr_pages, &zswap_stored_pages); count_vm_events(ZSWPOUT, nr_pages); ret = true; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index aa6c714892ec..9f3b8b682adb 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -685,6 +685,15 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } + if (ax25->ax25_dev) { + if (dev == ax25->ax25_dev->dev) { + rcu_read_unlock(); + break; + } + netdev_put(ax25->ax25_dev->dev, &ax25->dev_tracker); + ax25_dev_put(ax25->ax25_dev); + } + ax25->ax25_dev = ax25_dev_ax25dev(dev); if (!ax25->ax25_dev) { rcu_read_unlock(); @@ -692,6 +701,8 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } ax25_fillin_cb(ax25, ax25->ax25_dev); + netdev_hold(dev, &ax25->dev_tracker, GFP_ATOMIC); + ax25_dev_hold(ax25->ax25_dev); rcu_read_unlock(); break; diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index ac11f1f08db0..d35479c465e2 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -113,8 +113,6 @@ static void batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) { ewma_throughput_init(&hardif_neigh->bat_v.throughput); - INIT_WORK(&hardif_neigh->bat_v.metric_work, - batadv_v_elp_throughput_metric_update); } /** diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index 1d704574e6bf..b065578b4436 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -18,6 +18,7 @@ #include <linux/if_ether.h> #include <linux/jiffies.h> #include <linux/kref.h> +#include <linux/list.h> #include <linux/minmax.h> #include <linux/netdevice.h> #include <linux/nl80211.h> @@ -26,6 +27,7 @@ #include <linux/rcupdate.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> +#include <linux/slab.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> @@ -42,6 +44,18 @@ #include "send.h" /** + * struct batadv_v_metric_queue_entry - list of hardif neighbors which require + * and metric update + */ +struct batadv_v_metric_queue_entry { + /** @hardif_neigh: hardif neighbor scheduled for metric update */ + struct batadv_hardif_neigh_node *hardif_neigh; + + /** @list: list node for metric_queue */ + struct list_head list; +}; + +/** * batadv_v_elp_start_timer() - restart timer for ELP periodic work * @hard_iface: the interface for which the timer has to be reset */ @@ -59,25 +73,36 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface) /** * batadv_v_elp_get_throughput() - get the throughput towards a neighbour * @neigh: the neighbour for which the throughput has to be obtained + * @pthroughput: calculated throughput towards the given neighbour in multiples + * of 100kpbs (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc). * - * Return: The throughput towards the given neighbour in multiples of 100kpbs - * (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc). + * Return: true when value behind @pthroughput was set */ -static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) +static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh, + u32 *pthroughput) { struct batadv_hard_iface *hard_iface = neigh->if_incoming; + struct net_device *soft_iface = hard_iface->soft_iface; struct ethtool_link_ksettings link_settings; struct net_device *real_netdev; struct station_info sinfo; u32 throughput; int ret; + /* don't query throughput when no longer associated with any + * batman-adv interface + */ + if (!soft_iface) + return false; + /* if the user specified a customised value for this interface, then * return it directly */ throughput = atomic_read(&hard_iface->bat_v.throughput_override); - if (throughput != 0) - return throughput; + if (throughput != 0) { + *pthroughput = throughput; + return true; + } /* if this is a wireless device, then ask its throughput through * cfg80211 API @@ -104,27 +129,39 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) * possible to delete this neighbor. For now set * the throughput metric to 0. */ - return 0; + *pthroughput = 0; + return true; } if (ret) goto default_throughput; - if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) - return sinfo.expected_throughput / 100; + if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) { + *pthroughput = sinfo.expected_throughput / 100; + return true; + } /* try to estimate the expected throughput based on reported tx * rates */ - if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) - return cfg80211_calculate_bitrate(&sinfo.txrate) / 3; + if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) { + *pthroughput = cfg80211_calculate_bitrate(&sinfo.txrate) / 3; + return true; + } goto default_throughput; } + /* only use rtnl_trylock because the elp worker will be cancelled while + * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise + * wait forever when the elp work_item was started and it is then also + * trying to rtnl_lock + */ + if (!rtnl_trylock()) + return false; + /* if not a wifi interface, check if this device provides data via * ethtool (e.g. an Ethernet adapter) */ - rtnl_lock(); ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings); rtnl_unlock(); if (ret == 0) { @@ -135,13 +172,15 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX; throughput = link_settings.base.speed; - if (throughput && throughput != SPEED_UNKNOWN) - return throughput * 10; + if (throughput && throughput != SPEED_UNKNOWN) { + *pthroughput = throughput * 10; + return true; + } } default_throughput: if (!(hard_iface->bat_v.flags & BATADV_WARNING_DEFAULT)) { - batadv_info(hard_iface->soft_iface, + batadv_info(soft_iface, "WiFi driver or ethtool info does not provide information about link speeds on interface %s, therefore defaulting to hardcoded throughput values of %u.%1u Mbps. Consider overriding the throughput manually or checking your driver.\n", hard_iface->net_dev->name, BATADV_THROUGHPUT_DEFAULT_VALUE / 10, @@ -150,31 +189,26 @@ default_throughput: } /* if none of the above cases apply, return the base_throughput */ - return BATADV_THROUGHPUT_DEFAULT_VALUE; + *pthroughput = BATADV_THROUGHPUT_DEFAULT_VALUE; + return true; } /** * batadv_v_elp_throughput_metric_update() - worker updating the throughput * metric of a single hop neighbour - * @work: the work queue item + * @neigh: the neighbour to probe */ -void batadv_v_elp_throughput_metric_update(struct work_struct *work) +static void +batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh) { - struct batadv_hardif_neigh_node_bat_v *neigh_bat_v; - struct batadv_hardif_neigh_node *neigh; - - neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v, - metric_work); - neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node, - bat_v); + u32 throughput; + bool valid; - ewma_throughput_add(&neigh->bat_v.throughput, - batadv_v_elp_get_throughput(neigh)); + valid = batadv_v_elp_get_throughput(neigh, &throughput); + if (!valid) + return; - /* decrement refcounter to balance increment performed before scheduling - * this task - */ - batadv_hardif_neigh_put(neigh); + ewma_throughput_add(&neigh->bat_v.throughput, throughput); } /** @@ -248,14 +282,16 @@ batadv_v_elp_wifi_neigh_probe(struct batadv_hardif_neigh_node *neigh) */ static void batadv_v_elp_periodic_work(struct work_struct *work) { + struct batadv_v_metric_queue_entry *metric_entry; + struct batadv_v_metric_queue_entry *metric_safe; struct batadv_hardif_neigh_node *hardif_neigh; struct batadv_hard_iface *hard_iface; struct batadv_hard_iface_bat_v *bat_v; struct batadv_elp_packet *elp_packet; + struct list_head metric_queue; struct batadv_priv *bat_priv; struct sk_buff *skb; u32 elp_interval; - bool ret; bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work); hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v); @@ -291,6 +327,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) atomic_inc(&hard_iface->bat_v.elp_seqno); + INIT_LIST_HEAD(&metric_queue); + /* The throughput metric is updated on each sent packet. This way, if a * node is dead and no longer sends packets, batman-adv is still able to * react timely to its death. @@ -315,16 +353,28 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) /* Reading the estimated throughput from cfg80211 is a task that * may sleep and that is not allowed in an rcu protected - * context. Therefore schedule a task for that. + * context. Therefore add it to metric_queue and process it + * outside rcu protected context. */ - ret = queue_work(batadv_event_workqueue, - &hardif_neigh->bat_v.metric_work); - - if (!ret) + metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC); + if (!metric_entry) { batadv_hardif_neigh_put(hardif_neigh); + continue; + } + + metric_entry->hardif_neigh = hardif_neigh; + list_add(&metric_entry->list, &metric_queue); } rcu_read_unlock(); + list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) { + batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh); + + batadv_hardif_neigh_put(metric_entry->hardif_neigh); + list_del(&metric_entry->list); + kfree(metric_entry); + } + restart_timer: batadv_v_elp_start_timer(hard_iface); out: diff --git a/net/batman-adv/bat_v_elp.h b/net/batman-adv/bat_v_elp.h index 9e2740195fa2..c9cb0a307100 100644 --- a/net/batman-adv/bat_v_elp.h +++ b/net/batman-adv/bat_v_elp.h @@ -10,7 +10,6 @@ #include "main.h" #include <linux/skbuff.h> -#include <linux/workqueue.h> int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface); void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface); @@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct batadv_hard_iface *primary_iface, void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface); int batadv_v_elp_packet_recv(struct sk_buff *skb, struct batadv_hard_iface *if_incoming); -void batadv_v_elp_throughput_metric_update(struct work_struct *work); #endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */ diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 3c0a14a582e4..d4b71d34310f 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -3937,23 +3937,21 @@ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tt_data; u16 num_entries, num_vlan; - size_t flex_size; + size_t tt_data_sz; if (tvlv_value_len < sizeof(*tt_data)) return; tt_data = tvlv_value; - tvlv_value_len -= sizeof(*tt_data); - num_vlan = ntohs(tt_data->num_vlan); - flex_size = flex_array_size(tt_data, vlan_data, num_vlan); - if (tvlv_value_len < flex_size) + tt_data_sz = struct_size(tt_data, vlan_data, num_vlan); + if (tvlv_value_len < tt_data_sz) return; tt_change = (struct batadv_tvlv_tt_change *)((void *)tt_data - + flex_size); - tvlv_value_len -= flex_size; + + tt_data_sz); + tvlv_value_len -= tt_data_sz; num_entries = batadv_tt_entries(tvlv_value_len); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index f491bff8c51b..fe89f08533fe 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v { * neighbor */ unsigned long last_unicast_tx; - - /** @metric_work: work queue callback item for metric update */ - struct work_struct metric_work; }; /** diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig index 6746be07e222..e08aae35351a 100644 --- a/net/bluetooth/hidp/Kconfig +++ b/net/bluetooth/hidp/Kconfig @@ -1,8 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config BT_HIDP tristate "HIDP protocol support" - depends on BT_BREDR && INPUT && HID_SUPPORT - select HID + depends on BT_BREDR && HID help HIDP (Human Interface Device Protocol) is a transport layer for HID reports. HIDP is required for the Bluetooth Human diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 27b4c4a2ba1f..b22078b67972 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -119,7 +119,6 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, { struct l2cap_chan *c; - mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_scid(conn, cid); if (c) { /* Only lock if chan reference is not 0 */ @@ -127,7 +126,6 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, if (c) l2cap_chan_lock(c); } - mutex_unlock(&conn->chan_lock); return c; } @@ -140,7 +138,6 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, { struct l2cap_chan *c; - mutex_lock(&conn->chan_lock); c = __l2cap_get_chan_by_dcid(conn, cid); if (c) { /* Only lock if chan reference is not 0 */ @@ -148,7 +145,6 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, if (c) l2cap_chan_lock(c); } - mutex_unlock(&conn->chan_lock); return c; } @@ -418,7 +414,7 @@ static void l2cap_chan_timeout(struct work_struct *work) if (!conn) return; - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling * this work. No need to call l2cap_chan_hold(chan) here again. */ @@ -439,7 +435,7 @@ static void l2cap_chan_timeout(struct work_struct *work) l2cap_chan_unlock(chan); l2cap_chan_put(chan); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } struct l2cap_chan *l2cap_chan_create(void) @@ -636,14 +632,15 @@ void __l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) test_bit(FLAG_HOLD_HCI_CONN, &chan->flags)) hci_conn_hold(conn->hcon); - list_add(&chan->list, &conn->chan_l); + /* Append to the list since the order matters for ECRED */ + list_add_tail(&chan->list, &conn->chan_l); } void l2cap_chan_add(struct l2cap_conn *conn, struct l2cap_chan *chan) { - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); __l2cap_chan_add(conn, chan); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } void l2cap_chan_del(struct l2cap_chan *chan, int err) @@ -731,9 +728,9 @@ void l2cap_chan_list(struct l2cap_conn *conn, l2cap_chan_func_t func, if (!conn) return; - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); __l2cap_chan_list(conn, func, data); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } EXPORT_SYMBOL_GPL(l2cap_chan_list); @@ -745,7 +742,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work) struct hci_conn *hcon = conn->hcon; struct l2cap_chan *chan; - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); @@ -754,7 +751,7 @@ static void l2cap_conn_update_id_addr(struct work_struct *work) l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } static void l2cap_chan_le_connect_reject(struct l2cap_chan *chan) @@ -948,6 +945,16 @@ static u8 l2cap_get_ident(struct l2cap_conn *conn) return id; } +static void l2cap_send_acl(struct l2cap_conn *conn, struct sk_buff *skb, + u8 flags) +{ + /* Check if the hcon still valid before attempting to send */ + if (hci_conn_valid(conn->hcon->hdev, conn->hcon)) + hci_send_acl(conn->hchan, skb, flags); + else + kfree_skb(skb); +} + static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data) { @@ -970,7 +977,7 @@ static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, bt_cb(skb)->force_active = BT_POWER_FORCE_ACTIVE_ON; skb->priority = HCI_PRIO_MAX; - hci_send_acl(conn->hchan, skb, flags); + l2cap_send_acl(conn, skb, flags); } static void l2cap_do_send(struct l2cap_chan *chan, struct sk_buff *skb) @@ -1497,8 +1504,6 @@ static void l2cap_conn_start(struct l2cap_conn *conn) BT_DBG("conn %p", conn); - mutex_lock(&conn->chan_lock); - list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { l2cap_chan_lock(chan); @@ -1567,8 +1572,6 @@ static void l2cap_conn_start(struct l2cap_conn *conn) l2cap_chan_unlock(chan); } - - mutex_unlock(&conn->chan_lock); } static void l2cap_le_conn_ready(struct l2cap_conn *conn) @@ -1614,7 +1617,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) if (hcon->type == ACL_LINK) l2cap_request_info(conn); - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); list_for_each_entry(chan, &conn->chan_l, list) { @@ -1632,7 +1635,7 @@ static void l2cap_conn_ready(struct l2cap_conn *conn) l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); if (hcon->type == LE_LINK) l2cap_le_conn_ready(conn); @@ -1647,14 +1650,10 @@ static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) BT_DBG("conn %p", conn); - mutex_lock(&conn->chan_lock); - list_for_each_entry(chan, &conn->chan_l, list) { if (test_bit(FLAG_FORCE_RELIABLE, &chan->flags)) l2cap_chan_set_err(chan, err); } - - mutex_unlock(&conn->chan_lock); } static void l2cap_info_timeout(struct work_struct *work) @@ -1665,7 +1664,9 @@ static void l2cap_info_timeout(struct work_struct *work) conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; conn->info_ident = 0; + mutex_lock(&conn->lock); l2cap_conn_start(conn); + mutex_unlock(&conn->lock); } /* @@ -1757,6 +1758,8 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); + mutex_lock(&conn->lock); + kfree_skb(conn->rx_skb); skb_queue_purge(&conn->pending_rx); @@ -1775,8 +1778,6 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) /* Force the connection to be immediately dropped */ hcon->disc_timeout = 0; - mutex_lock(&conn->chan_lock); - /* Kill channels */ list_for_each_entry_safe(chan, l, &conn->chan_l, list) { l2cap_chan_hold(chan); @@ -1790,15 +1791,14 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) l2cap_chan_put(chan); } - mutex_unlock(&conn->chan_lock); - - hci_chan_del(conn->hchan); - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) cancel_delayed_work_sync(&conn->info_timer); - hcon->l2cap_data = NULL; + hci_chan_del(conn->hchan); conn->hchan = NULL; + + hcon->l2cap_data = NULL; + mutex_unlock(&conn->lock); l2cap_conn_put(conn); } @@ -2916,8 +2916,6 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) BT_DBG("conn %p", conn); - mutex_lock(&conn->chan_lock); - list_for_each_entry(chan, &conn->chan_l, list) { if (chan->chan_type != L2CAP_CHAN_RAW) continue; @@ -2932,8 +2930,6 @@ static void l2cap_raw_recv(struct l2cap_conn *conn, struct sk_buff *skb) if (chan->ops->recv(chan, nskb)) kfree_skb(nskb); } - - mutex_unlock(&conn->chan_lock); } /* ---- L2CAP signalling commands ---- */ @@ -3776,7 +3772,11 @@ static void l2cap_ecred_rsp_defer(struct l2cap_chan *chan, void *data) struct l2cap_ecred_conn_rsp *rsp_flex = container_of(&rsp->pdu.rsp, struct l2cap_ecred_conn_rsp, hdr); - if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags)) + /* Check if channel for outgoing connection or if it wasn't deferred + * since in those cases it must be skipped. + */ + if (test_bit(FLAG_ECRED_CONN_REQ_SENT, &chan->flags) || + !test_and_clear_bit(FLAG_DEFER_SETUP, &chan->flags)) return; /* Reset ident so only one response is sent */ @@ -3952,7 +3952,6 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, goto response; } - mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); /* Check if the ACL is secure enough (if not SDP) */ @@ -4059,7 +4058,6 @@ response: } l2cap_chan_unlock(pchan); - mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); } @@ -4098,27 +4096,19 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x", dcid, scid, result, status); - mutex_lock(&conn->chan_lock); - if (scid) { chan = __l2cap_get_chan_by_scid(conn, scid); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; } else { chan = __l2cap_get_chan_by_ident(conn, cmd->ident); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; } chan = l2cap_chan_hold_unless_zero(chan); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; err = 0; @@ -4156,9 +4146,6 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, l2cap_chan_unlock(chan); l2cap_chan_put(chan); -unlock: - mutex_unlock(&conn->chan_lock); - return err; } @@ -4446,11 +4433,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, chan->ops->set_shutdown(chan); - l2cap_chan_unlock(chan); - mutex_lock(&conn->chan_lock); - l2cap_chan_lock(chan); l2cap_chan_del(chan, ECONNRESET); - mutex_unlock(&conn->chan_lock); chan->ops->close(chan); @@ -4487,11 +4470,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, return 0; } - l2cap_chan_unlock(chan); - mutex_lock(&conn->chan_lock); - l2cap_chan_lock(chan); l2cap_chan_del(chan, 0); - mutex_unlock(&conn->chan_lock); chan->ops->close(chan); @@ -4689,13 +4668,9 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn, BT_DBG("dcid 0x%4.4x mtu %u mps %u credits %u result 0x%2.2x", dcid, mtu, mps, credits, result); - mutex_lock(&conn->chan_lock); - chan = __l2cap_get_chan_by_ident(conn, cmd->ident); - if (!chan) { - err = -EBADSLT; - goto unlock; - } + if (!chan) + return -EBADSLT; err = 0; @@ -4743,9 +4718,6 @@ static int l2cap_le_connect_rsp(struct l2cap_conn *conn, l2cap_chan_unlock(chan); -unlock: - mutex_unlock(&conn->chan_lock); - return err; } @@ -4857,7 +4829,6 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, goto response; } - mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); if (!smp_sufficient_security(conn->hcon, pchan->sec_level, @@ -4923,7 +4894,6 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn, response_unlock: l2cap_chan_unlock(pchan); - mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); if (result == L2CAP_CR_PEND) @@ -5057,7 +5027,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, goto response; } - mutex_lock(&conn->chan_lock); l2cap_chan_lock(pchan); if (!smp_sufficient_security(conn->hcon, pchan->sec_level, @@ -5132,7 +5101,6 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn, unlock: l2cap_chan_unlock(pchan); - mutex_unlock(&conn->chan_lock); l2cap_chan_put(pchan); response: @@ -5169,8 +5137,6 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, BT_DBG("mtu %u mps %u credits %u result 0x%4.4x", mtu, mps, credits, result); - mutex_lock(&conn->chan_lock); - cmd_len -= sizeof(*rsp); list_for_each_entry_safe(chan, tmp, &conn->chan_l, list) { @@ -5256,8 +5222,6 @@ static inline int l2cap_ecred_conn_rsp(struct l2cap_conn *conn, l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); - return err; } @@ -5370,8 +5334,6 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn, if (cmd_len < sizeof(*rej)) return -EPROTO; - mutex_lock(&conn->chan_lock); - chan = __l2cap_get_chan_by_ident(conn, cmd->ident); if (!chan) goto done; @@ -5386,7 +5348,6 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn, l2cap_chan_put(chan); done: - mutex_unlock(&conn->chan_lock); return 0; } @@ -6841,8 +6802,12 @@ static void process_pending_rx(struct work_struct *work) BT_DBG(""); + mutex_lock(&conn->lock); + while ((skb = skb_dequeue(&conn->pending_rx))) l2cap_recv_frame(conn, skb); + + mutex_unlock(&conn->lock); } static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) @@ -6881,7 +6846,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon) conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR; mutex_init(&conn->ident_lock); - mutex_init(&conn->chan_lock); + mutex_init(&conn->lock); INIT_LIST_HEAD(&conn->chan_l); INIT_LIST_HEAD(&conn->users); @@ -7072,7 +7037,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, } } - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); l2cap_chan_lock(chan); if (cid && __l2cap_get_chan_by_dcid(conn, cid)) { @@ -7113,7 +7078,7 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid, chan_unlock: l2cap_chan_unlock(chan); - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); done: hci_dev_unlock(hdev); hci_dev_put(hdev); @@ -7325,7 +7290,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) BT_DBG("conn %p status 0x%2.2x encrypt %u", conn, status, encrypt); - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); list_for_each_entry(chan, &conn->chan_l, list) { l2cap_chan_lock(chan); @@ -7399,7 +7364,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) l2cap_chan_unlock(chan); } - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); } /* Append fragment into frame respecting the maximum len of rx_skb */ @@ -7466,19 +7431,45 @@ static void l2cap_recv_reset(struct l2cap_conn *conn) conn->rx_len = 0; } +struct l2cap_conn *l2cap_conn_hold_unless_zero(struct l2cap_conn *c) +{ + if (!c) + return NULL; + + BT_DBG("conn %p orig refcnt %u", c, kref_read(&c->ref)); + + if (!kref_get_unless_zero(&c->ref)) + return NULL; + + return c; +} + void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) { - struct l2cap_conn *conn = hcon->l2cap_data; + struct l2cap_conn *conn; int len; + /* Lock hdev to access l2cap_data to avoid race with l2cap_conn_del */ + hci_dev_lock(hcon->hdev); + + conn = hcon->l2cap_data; + if (!conn) conn = l2cap_conn_add(hcon); - if (!conn) - goto drop; + conn = l2cap_conn_hold_unless_zero(conn); + + hci_dev_unlock(hcon->hdev); + + if (!conn) { + kfree_skb(skb); + return; + } BT_DBG("conn %p len %u flags 0x%x", conn, skb->len, flags); + mutex_lock(&conn->lock); + switch (flags) { case ACL_START: case ACL_START_NO_FLUSH: @@ -7503,7 +7494,7 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) if (len == skb->len) { /* Complete frame received */ l2cap_recv_frame(conn, skb); - return; + goto unlock; } BT_DBG("Start: total len %d, frag len %u", len, skb->len); @@ -7567,6 +7558,9 @@ void l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 flags) drop: kfree_skb(skb); +unlock: + mutex_unlock(&conn->lock); + l2cap_conn_put(conn); } static struct hci_cb l2cap_cb = { diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 46ea0bee2259..acd11b268b98 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1326,9 +1326,10 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) /* prevent sk structure from being freed whilst unlocked */ sock_hold(sk); - chan = l2cap_pi(sk)->chan; /* prevent chan structure from being freed whilst unlocked */ - l2cap_chan_hold(chan); + chan = l2cap_chan_hold_unless_zero(l2cap_pi(sk)->chan); + if (!chan) + goto shutdown_already; BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); @@ -1358,22 +1359,20 @@ static int l2cap_sock_shutdown(struct socket *sock, int how) release_sock(sk); l2cap_chan_lock(chan); - conn = chan->conn; - if (conn) - /* prevent conn structure from being freed */ - l2cap_conn_get(conn); + /* prevent conn structure from being freed */ + conn = l2cap_conn_hold_unless_zero(chan->conn); l2cap_chan_unlock(chan); if (conn) /* mutex lock must be taken before l2cap_chan_lock() */ - mutex_lock(&conn->chan_lock); + mutex_lock(&conn->lock); l2cap_chan_lock(chan); l2cap_chan_close(chan, 0); l2cap_chan_unlock(chan); if (conn) { - mutex_unlock(&conn->chan_lock); + mutex_unlock(&conn->lock); l2cap_conn_put(conn); } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 8f6f7db48d4e..7cb192cbd65f 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -660,12 +660,9 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, void __user *data_in = u64_to_user_ptr(kattr->test.data_in); void *data; - if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) + if (user_size < ETH_HLEN || user_size > PAGE_SIZE - headroom - tailroom) return ERR_PTR(-EINVAL); - if (user_size > size) - return ERR_PTR(-EMSGSIZE); - size = SKB_DATA_ALIGN(size); data = kzalloc(size + headroom + tailroom, GFP_USER); if (!data) diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 305dd72c844c..17226b2341d0 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -1132,7 +1132,7 @@ static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk, todo_size = size; - while (todo_size) { + do { struct j1939_sk_buff_cb *skcb; segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE, @@ -1177,7 +1177,7 @@ static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk, todo_size -= segment_size; session->total_queued_size += segment_size; - } + } while (todo_size); switch (ret) { case 0: /* OK */ diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index 95f7a7e65a73..9b72d118d756 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -382,8 +382,9 @@ sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session, skb_queue_walk(&session->skb_queue, do_skb) { do_skcb = j1939_skb_to_cb(do_skb); - if (offset_start >= do_skcb->offset && - offset_start < (do_skcb->offset + do_skb->len)) { + if ((offset_start >= do_skcb->offset && + offset_start < (do_skcb->offset + do_skb->len)) || + (offset_start == 0 && do_skcb->offset == 0 && do_skb->len == 0)) { skb = do_skb; } } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 7d41cde1bcca..2e538399757f 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -355,11 +355,6 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = { static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) { - const struct btf *btf_vmlinux; - const struct btf_type *t; - const char *tname; - u32 btf_id; - if (prog->aux->dst_prog) return false; @@ -374,13 +369,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) return true; case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: - btf_vmlinux = bpf_get_btf_vmlinux(); - if (IS_ERR_OR_NULL(btf_vmlinux)) - return false; - btf_id = prog->aux->attach_btf_id; - t = btf_type_by_id(btf_vmlinux, btf_id); - tname = btf_name_by_offset(btf_vmlinux, t->name_off); - return !!strncmp(tname, "bpf_sk_storage", + return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage", strlen("bpf_sk_storage")); default: return false; diff --git a/net/core/dev.c b/net/core/dev.c index c0021cbd28fc..30da277c5a6f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1121,6 +1121,12 @@ out: return ret; } +static bool dev_addr_cmp(struct net_device *dev, unsigned short type, + const char *ha) +{ + return dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len); +} + /** * dev_getbyhwaddr_rcu - find a device by its hardware address * @net: the applicable net namespace @@ -1129,7 +1135,7 @@ out: * * Search for an interface by MAC address. Returns NULL if the device * is not found or a pointer to the device. - * The caller must hold RCU or RTNL. + * The caller must hold RCU. * The returned device has not had its ref count increased * and the caller must therefore be careful about locking * @@ -1141,14 +1147,39 @@ struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, struct net_device *dev; for_each_netdev_rcu(net, dev) - if (dev->type == type && - !memcmp(dev->dev_addr, ha, dev->addr_len)) + if (dev_addr_cmp(dev, type, ha)) return dev; return NULL; } EXPORT_SYMBOL(dev_getbyhwaddr_rcu); +/** + * dev_getbyhwaddr() - find a device by its hardware address + * @net: the applicable net namespace + * @type: media type of device + * @ha: hardware address + * + * Similar to dev_getbyhwaddr_rcu(), but the owner needs to hold + * rtnl_lock. + * + * Context: rtnl_lock() must be held. + * Return: pointer to the net_device, or NULL if not found + */ +struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, + const char *ha) +{ + struct net_device *dev; + + ASSERT_RTNL(); + for_each_netdev(net, dev) + if (dev_addr_cmp(dev, type, ha)) + return dev; + + return NULL; +} +EXPORT_SYMBOL(dev_getbyhwaddr); + struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type) { struct net_device *dev, *ret = NULL; @@ -2070,20 +2101,55 @@ static void __move_netdevice_notifier_net(struct net *src_net, __register_netdevice_notifier_net(dst_net, nb, true); } +static void rtnl_net_dev_lock(struct net_device *dev) +{ + bool again; + + do { + struct net *net; + + again = false; + + /* netns might be being dismantled. */ + rcu_read_lock(); + net = dev_net_rcu(dev); + net_passive_inc(net); + rcu_read_unlock(); + + rtnl_net_lock(net); + +#ifdef CONFIG_NET_NS + /* dev might have been moved to another netns. */ + if (!net_eq(net, rcu_access_pointer(dev->nd_net.net))) { + rtnl_net_unlock(net); + net_passive_dec(net); + again = true; + } +#endif + } while (again); +} + +static void rtnl_net_dev_unlock(struct net_device *dev) +{ + struct net *net = dev_net(dev); + + rtnl_net_unlock(net); + net_passive_dec(net); +} + int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn) { - struct net *net = dev_net(dev); int err; - rtnl_net_lock(net); - err = __register_netdevice_notifier_net(net, nb, false); + rtnl_net_dev_lock(dev); + err = __register_netdevice_notifier_net(dev_net(dev), nb, false); if (!err) { nn->nb = nb; list_add(&nn->list, &dev->net_notifier_list); } - rtnl_net_unlock(net); + rtnl_net_dev_unlock(dev); return err; } @@ -2093,13 +2159,12 @@ int unregister_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn) { - struct net *net = dev_net(dev); int err; - rtnl_net_lock(net); + rtnl_net_dev_lock(dev); list_del(&nn->list); - err = __unregister_netdevice_notifier_net(net, nb); - rtnl_net_unlock(net); + err = __unregister_netdevice_notifier_net(dev_net(dev), nb); + rtnl_net_dev_unlock(dev); return err; } @@ -4692,7 +4757,7 @@ use_local_napi: * we have to raise NET_RX_SOFTIRQ. */ if (!sd->in_net_rx_action) - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + raise_softirq_irqoff(NET_RX_SOFTIRQ); } #ifdef CONFIG_RPS @@ -6920,6 +6985,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi) list_add_rcu(&napi->dev_list, higher); /* adds after higher */ } +/* Double check that napi_get_frags() allocates skbs with + * skb->head being backed by slab, not a page fragment. + * This is to make sure bug fixed in 3226b158e67c + * ("net: avoid 32 x truesize under-estimation for tiny skbs") + * does not accidentally come back. + */ +static void napi_get_frags_check(struct napi_struct *napi) +{ + struct sk_buff *skb; + + local_bh_disable(); + skb = napi_get_frags(napi); + WARN_ON_ONCE(skb && skb->head_frag); + napi_free_frags(napi); + local_bh_enable(); +} + void netif_napi_add_weight_locked(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), @@ -11286,6 +11368,20 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; const struct net_device_core_stats __percpu *p; + /* + * IPv{4,6} and udp tunnels share common stat helpers and use + * different stat type (NETDEV_PCPU_STAT_TSTATS vs + * NETDEV_PCPU_STAT_DSTATS). Ensure the accounting is consistent. + */ + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_bytes) != + offsetof(struct pcpu_dstats, rx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_packets) != + offsetof(struct pcpu_dstats, rx_packets)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_bytes) != + offsetof(struct pcpu_dstats, tx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_packets) != + offsetof(struct pcpu_dstats, tx_packets)); + if (ops->ndo_get_stats64) { memset(storage, 0, sizeof(*storage)); ops->ndo_get_stats64(dev, storage); @@ -11866,11 +11962,9 @@ EXPORT_SYMBOL(unregister_netdevice_many); */ void unregister_netdev(struct net_device *dev) { - struct net *net = dev_net(dev); - - rtnl_net_lock(net); + rtnl_net_dev_lock(dev); unregister_netdevice(dev); - rtnl_net_unlock(net); + rtnl_net_dev_unlock(dev); } EXPORT_SYMBOL(unregister_netdev); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 6efd4cccc9dd..212f0a048cab 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1734,30 +1734,30 @@ static int __init init_net_drop_monitor(void) return -ENOSPC; } - rc = genl_register_family(&net_drop_monitor_family); - if (rc) { - pr_err("Could not create drop monitor netlink family\n"); - return rc; + for_each_possible_cpu(cpu) { + net_dm_cpu_data_init(cpu); + net_dm_hw_cpu_data_init(cpu); } - WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT); rc = register_netdevice_notifier(&dropmon_net_notifier); if (rc < 0) { pr_crit("Failed to register netdevice notifier\n"); + return rc; + } + + rc = genl_register_family(&net_drop_monitor_family); + if (rc) { + pr_err("Could not create drop monitor netlink family\n"); goto out_unreg; } + WARN_ON(net_drop_monitor_family.mcgrp_offset != NET_DM_GRP_ALERT); rc = 0; - for_each_possible_cpu(cpu) { - net_dm_cpu_data_init(cpu); - net_dm_hw_cpu_data_init(cpu); - } - goto out; out_unreg: - genl_unregister_family(&net_drop_monitor_family); + WARN_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); out: return rc; } @@ -1766,19 +1766,18 @@ static void exit_net_drop_monitor(void) { int cpu; - BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); - /* * Because of the module_get/put we do in the trace state change path * we are guaranteed not to have any current users when we get here */ + BUG_ON(genl_unregister_family(&net_drop_monitor_family)); + + BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); for_each_possible_cpu(cpu) { net_dm_hw_cpu_data_fini(cpu); net_dm_cpu_data_fini(cpu); } - - BUG_ON(genl_unregister_family(&net_drop_monitor_family)); } module_init(init_net_drop_monitor); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e684ba3ebb38..94a7872ab231 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -37,8 +37,8 @@ static const struct fib_kuid_range fib_kuid_range_unset = { bool fib_rule_matchall(const struct fib_rule *rule) { - if (rule->iifindex || rule->oifindex || rule->mark || rule->tun_id || - rule->flags) + if (READ_ONCE(rule->iifindex) || READ_ONCE(rule->oifindex) || + rule->mark || rule->tun_id || rule->flags) return false; if (rule->suppress_ifgroup != -1 || rule->suppress_prefixlen != -1) return false; @@ -261,12 +261,14 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, struct flowi *fl, int flags, struct fib_lookup_arg *arg) { - int ret = 0; + int iifindex, oifindex, ret = 0; - if (rule->iifindex && (rule->iifindex != fl->flowi_iif)) + iifindex = READ_ONCE(rule->iifindex); + if (iifindex && (iifindex != fl->flowi_iif)) goto out; - if (rule->oifindex && (rule->oifindex != fl->flowi_oif)) + oifindex = READ_ONCE(rule->oifindex); + if (oifindex && (oifindex != fl->flowi_oif)) goto out; if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) @@ -1041,14 +1043,14 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, if (rule->iifname[0]) { if (nla_put_string(skb, FRA_IIFNAME, rule->iifname)) goto nla_put_failure; - if (rule->iifindex == -1) + if (READ_ONCE(rule->iifindex) == -1) frh->flags |= FIB_RULE_IIF_DETACHED; } if (rule->oifname[0]) { if (nla_put_string(skb, FRA_OIFNAME, rule->oifname)) goto nla_put_failure; - if (rule->oifindex == -1) + if (READ_ONCE(rule->oifindex) == -1) frh->flags |= FIB_RULE_OIF_DETACHED; } @@ -1220,10 +1222,10 @@ static void attach_rules(struct list_head *rules, struct net_device *dev) list_for_each_entry(rule, rules, list) { if (rule->iifindex == -1 && strcmp(dev->name, rule->iifname) == 0) - rule->iifindex = dev->ifindex; + WRITE_ONCE(rule->iifindex, dev->ifindex); if (rule->oifindex == -1 && strcmp(dev->name, rule->oifname) == 0) - rule->oifindex = dev->ifindex; + WRITE_ONCE(rule->oifindex, dev->ifindex); } } @@ -1233,9 +1235,9 @@ static void detach_rules(struct list_head *rules, struct net_device *dev) list_for_each_entry(rule, rules, list) { if (rule->iifindex == dev->ifindex) - rule->iifindex = -1; + WRITE_ONCE(rule->iifindex, -1); if (rule->oifindex == dev->ifindex) - rule->oifindex = -1; + WRITE_ONCE(rule->oifindex, -1); } } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 0e638a37aa09..9cd8de6bebb5 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -853,23 +853,30 @@ __skb_flow_dissect_ports(const struct sk_buff *skb, void *target_container, const void *data, int nhoff, u8 ip_proto, int hlen) { - enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX; - struct flow_dissector_key_ports *key_ports; + struct flow_dissector_key_ports_range *key_ports_range = NULL; + struct flow_dissector_key_ports *key_ports = NULL; + __be32 ports; if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) - dissector_ports = FLOW_DISSECTOR_KEY_PORTS; - else if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS_RANGE)) - dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE; + key_ports = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS, + target_container); + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS_RANGE)) + key_ports_range = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE, + target_container); - if (dissector_ports == FLOW_DISSECTOR_KEY_MAX) + if (!key_ports && !key_ports_range) return; - key_ports = skb_flow_dissector_target(flow_dissector, - dissector_ports, - target_container); - key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, - data, hlen); + ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen); + + if (key_ports) + key_ports->ports = ports; + + if (key_ports_range) + key_ports_range->tp.ports = ports; } static void @@ -924,6 +931,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector *flow_dissector, void *target_container) { + struct flow_dissector_key_ports_range *key_ports_range = NULL; struct flow_dissector_key_ports *key_ports = NULL; struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; @@ -968,20 +976,21 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; } - if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) { key_ports = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_PORTS, target_container); - else if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS_RANGE)) - key_ports = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_PORTS_RANGE, - target_container); - - if (key_ports) { key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE)) { + key_ports_range = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_PORTS_RANGE, + target_container); + key_ports_range->tp.src = flow_keys->sport; + key_ports_range->tp.dst = flow_keys->dport; + } if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_FLOW_LABEL)) { @@ -1108,10 +1117,12 @@ bool __skb_flow_dissect(const struct net *net, FLOW_DISSECTOR_KEY_BASIC, target_container); + rcu_read_lock(); + if (skb) { if (!net) { if (skb->dev) - net = dev_net(skb->dev); + net = dev_net_rcu(skb->dev); else if (skb->sk) net = sock_net(skb->sk); } @@ -1122,7 +1133,6 @@ bool __skb_flow_dissect(const struct net *net, enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR; struct bpf_prog_array *run_array; - rcu_read_lock(); run_array = rcu_dereference(init_net.bpf.run_array[type]); if (!run_array) run_array = rcu_dereference(net->bpf.run_array[type]); @@ -1150,17 +1160,17 @@ bool __skb_flow_dissect(const struct net *net, prog = READ_ONCE(run_array->items[0].prog); result = bpf_flow_dissect(prog, &ctx, n_proto, nhoff, hlen, flags); - if (result == BPF_FLOW_DISSECTOR_CONTINUE) - goto dissect_continue; - __skb_flow_bpf_to_target(&flow_keys, flow_dissector, - target_container); - rcu_read_unlock(); - return result == BPF_OK; + if (result != BPF_FLOW_DISSECTOR_CONTINUE) { + __skb_flow_bpf_to_target(&flow_keys, flow_dissector, + target_container); + rcu_read_unlock(); + return result == BPF_OK; + } } -dissect_continue: - rcu_read_unlock(); } + rcu_read_unlock(); + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct ethhdr *eth = eth_hdr(skb); diff --git a/net/core/gro.c b/net/core/gro.c index d1f44084e978..0ad549b07e03 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -7,9 +7,6 @@ #define MAX_GRO_SKBS 8 -/* This should be increased if a protocol with a bigger head is added. */ -#define GRO_MAX_HEAD (MAX_HEADER + 128) - static DEFINE_SPINLOCK(offload_lock); /** @@ -656,6 +653,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->pkt_type = PACKET_HOST; skb->encapsulation = 0; + skb->ip_summed = CHECKSUM_NONE; skb_shinfo(skb)->gso_type = 0; skb_shinfo(skb)->gso_size = 0; if (unlikely(skb->slow_gro)) { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 89656d180bc6..bd0251bd74a1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3447,10 +3447,12 @@ static const struct seq_operations neigh_stat_seq_ops = { static void __neigh_notify(struct neighbour *n, int type, int flags, u32 pid) { - struct net *net = dev_net(n->dev); struct sk_buff *skb; int err = -ENOBUFS; + struct net *net; + rcu_read_lock(); + net = dev_net_rcu(n->dev); skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; @@ -3463,9 +3465,11 @@ static void __neigh_notify(struct neighbour *n, int type, int flags, goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); - return; + goto out; errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); +out: + rcu_read_unlock(); } void neigh_app_ns(struct neighbour *n) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index cb39a12b2f82..4303f2a49262 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -464,7 +464,7 @@ static void net_complete_free(void) } -static void net_free(struct net *net) +void net_passive_dec(struct net *net) { if (refcount_dec_and_test(&net->passive)) { kfree(rcu_access_pointer(net->gen)); @@ -482,7 +482,7 @@ void net_drop_ns(void *p) struct net *net = (struct net *)p; if (net) - net_free(net); + net_passive_dec(net); } struct net *copy_net_ns(unsigned long flags, @@ -523,7 +523,7 @@ put_userns: key_remove_domain(net->key_domain); #endif put_user_ns(user_ns); - net_free(net); + net_passive_dec(net); dec_ucounts: dec_net_namespaces(ucounts); return ERR_PTR(rv); @@ -672,7 +672,7 @@ static void cleanup_net(struct work_struct *work) key_remove_domain(net->key_domain); #endif put_user_ns(net->user_ns); - net_free(net); + net_passive_dec(net); } cleanup_net_task = NULL; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1f4d4b5570ab..d1e559fce918 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3432,6 +3432,7 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, err = -ENODEV; rtnl_nets_unlock(&rtnl_nets); + rtnl_nets_destroy(&rtnl_nets); errout: return err; } diff --git a/net/core/scm.c b/net/core/scm.c index 4f6a14babe5a..733c0cbd393d 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -282,6 +282,16 @@ efault: } EXPORT_SYMBOL(put_cmsg); +int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len, + void *data) +{ + /* Don't produce truncated CMSGs */ + if (!msg->msg_control || msg->msg_controllen < CMSG_LEN(len)) + return -ETOOSMALL; + + return put_cmsg(msg, level, type, len, data); +} + void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss_internal) { struct scm_timestamping64 tss; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a441613a1e6c..b1c81687e9d8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -69,6 +69,7 @@ #include <net/dst.h> #include <net/sock.h> #include <net/checksum.h> +#include <net/gro.h> #include <net/gso.h> #include <net/hotdata.h> #include <net/ip6_checksum.h> @@ -95,7 +96,9 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; #endif -#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER) +#define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN) +#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \ + GRO_MAX_HEAD_PAD)) /* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique @@ -220,67 +223,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) #define NAPI_SKB_CACHE_BULK 16 #define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2) -#if PAGE_SIZE == SZ_4K - -#define NAPI_HAS_SMALL_PAGE_FRAG 1 -#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc) - -/* specialized page frag allocator using a single order 0 page - * and slicing it into 1K sized fragment. Constrained to systems - * with a very limited amount of 1K fragments fitting a single - * page - to avoid excessive truesize underestimation - */ - -struct page_frag_1k { - void *va; - u16 offset; - bool pfmemalloc; -}; - -static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp) -{ - struct page *page; - int offset; - - offset = nc->offset - SZ_1K; - if (likely(offset >= 0)) - goto use_frag; - - page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); - if (!page) - return NULL; - - nc->va = page_address(page); - nc->pfmemalloc = page_is_pfmemalloc(page); - offset = PAGE_SIZE - SZ_1K; - page_ref_add(page, offset / SZ_1K); - -use_frag: - nc->offset = offset; - return nc->va + offset; -} -#else - -/* the small page is actually unused in this build; add dummy helpers - * to please the compiler and avoid later preprocessor's conditionals - */ -#define NAPI_HAS_SMALL_PAGE_FRAG 0 -#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false - -struct page_frag_1k { -}; - -static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask) -{ - return NULL; -} - -#endif - struct napi_alloc_cache { local_lock_t bh_lock; struct page_frag_cache page; - struct page_frag_1k page_small; unsigned int skb_count; void *skb_cache[NAPI_SKB_CACHE_SIZE]; }; @@ -290,23 +235,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; -/* Double check that napi_get_frags() allocates skbs with - * skb->head being backed by slab, not a page fragment. - * This is to make sure bug fixed in 3226b158e67c - * ("net: avoid 32 x truesize under-estimation for tiny skbs") - * does not accidentally come back. - */ -void napi_get_frags_check(struct napi_struct *napi) -{ - struct sk_buff *skb; - - local_bh_disable(); - skb = napi_get_frags(napi); - WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag); - napi_free_frags(napi); - local_bh_enable(); -} - void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); @@ -736,7 +664,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len, /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. */ - if (len <= SKB_WITH_OVERHEAD(1024) || + if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE); @@ -813,10 +741,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. - * When the small frag allocator is available, prefer it over kmalloc - * for small fragments */ - if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) || + if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, @@ -826,32 +752,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len) goto skb_success; } + len = SKB_HEAD_ALIGN(len); + if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; local_lock_nested_bh(&napi_alloc_cache.bh_lock); nc = this_cpu_ptr(&napi_alloc_cache); - if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) { - /* we are artificially inflating the allocation size, but - * that is not as bad as it may look like, as: - * - 'len' less than GRO_MAX_HEAD makes little sense - * - On most systems, larger 'len' values lead to fragment - * size above 512 bytes - * - kmalloc would use the kmalloc-1k slab for such values - * - Builds with smaller GRO_MAX_HEAD will very likely do - * little networking, as that implies no WiFi and no - * tunnels support, and 32 bits arches. - */ - len = SZ_1K; - data = page_frag_alloc_1k(&nc->page_small, gfp_mask); - pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small); - } else { - len = SKB_HEAD_ALIGN(len); - - data = page_frag_alloc(&nc->page, len, gfp_mask); - pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); - } + data = page_frag_alloc(&nc->page, len, gfp_mask); + pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!data)) @@ -6123,11 +6033,11 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->offload_fwd_mark = 0; skb->offload_l3_fwd_mark = 0; #endif + ipvs_reset(skb); if (!xnet) return; - ipvs_reset(skb); skb->mark = 0; skb_clear_tstamp(skb); } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 61f3f3d4e528..0ddc4c718833 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -549,6 +549,9 @@ static int sk_psock_skb_ingress_enqueue(struct sk_buff *skb, return num_sge; } +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) + psock->ingress_bytes += len; +#endif copied = len; msg->sg.start = 0; msg->sg.size = copied; @@ -1144,6 +1147,10 @@ int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock) if (!ret) sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED); + if (sk_is_tcp(sk)) { + psock->strp.cb.read_sock = tcp_bpf_strp_read_sock; + psock->copied_seq = tcp_sk(sk)->copied_seq; + } return ret; } diff --git a/net/core/sock.c b/net/core/sock.c index eae2ae70a2e0..6c0e87f97fa4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2246,6 +2246,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, get_net_track(net, &sk->ns_tracker, priority); sock_inuse_add(net, 1); } else { + net_passive_inc(net); __netns_tracker_alloc(net, &sk->ns_tracker, false, priority); } @@ -2270,6 +2271,7 @@ EXPORT_SYMBOL(sk_alloc); static void __sk_destruct(struct rcu_head *head) { struct sock *sk = container_of(head, struct sock, sk_rcu); + struct net *net = sock_net(sk); struct sk_filter *filter; if (sk->sk_destruct) @@ -2301,14 +2303,28 @@ static void __sk_destruct(struct rcu_head *head) put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); - if (likely(sk->sk_net_refcnt)) - put_net_track(sock_net(sk), &sk->ns_tracker); - else - __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); - + if (likely(sk->sk_net_refcnt)) { + put_net_track(net, &sk->ns_tracker); + } else { + __netns_tracker_free(net, &sk->ns_tracker, false); + net_passive_dec(net); + } sk_prot_free(sk->sk_prot_creator, sk); } +void sk_net_refcnt_upgrade(struct sock *sk) +{ + struct net *net = sock_net(sk); + + WARN_ON_ONCE(sk->sk_net_refcnt); + __netns_tracker_free(net, &sk->ns_tracker, false); + net_passive_dec(net); + sk->sk_net_refcnt = 1; + get_net_track(net, &sk->ns_tracker, GFP_KERNEL); + sock_inuse_add(net, 1); +} +EXPORT_SYMBOL_GPL(sk_net_refcnt_upgrade); + void sk_destruct(struct sock *sk) { bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); @@ -2405,6 +2421,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) * is not properly dismantling its kernel sockets at netns * destroy time. */ + net_passive_inc(sock_net(newsk)); __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker, false, priority); } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index f1b9b3958792..82a14f131d00 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -303,7 +303,10 @@ static int sock_map_link(struct bpf_map *map, struct sock *sk) write_lock_bh(&sk->sk_callback_lock); if (stream_parser && stream_verdict && !psock->saved_data_ready) { - ret = sk_psock_init_strp(sk, psock); + if (sk_is_tcp(sk)) + ret = sk_psock_init_strp(sk, psock); + else + ret = -EOPNOTSUPP; if (ret) { write_unlock_bh(&sk->sk_callback_lock); sk_psock_put(sk, psock); @@ -541,6 +544,9 @@ static bool sock_map_sk_state_allowed(const struct sock *sk) return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); if (sk_is_stream_unix(sk)) return (1 << sk->sk_state) & TCPF_ESTABLISHED; + if (sk_is_vsock(sk) && + (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) + return (1 << sk->sk_state) & TCPF_ESTABLISHED; return true; } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index ad2741f1346a..c7769ee0d9c5 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -34,6 +34,7 @@ static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; +static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ; static int net_msg_warn; /* Unused, but still a sysctl */ @@ -587,7 +588,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .extra1 = &netdev_budget_usecs_min, }, { .procname = "fb_tunnels_only_for_init_net", diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 2bd77c94f9f1..b97374b508f6 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -6,6 +6,7 @@ #include <linux/rtnetlink.h> #include <linux/ptp_clock_kernel.h> #include <linux/phy_link_topology.h> +#include <net/netdev_queues.h> #include "netlink.h" #include "common.h" @@ -462,6 +463,11 @@ const char ts_rx_filter_names[][ETH_GSTRING_LEN] = { }; static_assert(ARRAY_SIZE(ts_rx_filter_names) == __HWTSTAMP_FILTER_CNT); +const char ts_flags_names[][ETH_GSTRING_LEN] = { + [const_ilog2(HWTSTAMP_FLAG_BONDED_PHC_INDEX)] = "bonded-phc-index", +}; +static_assert(ARRAY_SIZE(ts_flags_names) == __HWTSTAMP_FLAG_CNT); + const char udp_tunnel_type_names[][ETH_GSTRING_LEN] = { [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN] = "vxlan", [ETHTOOL_UDP_TUNNEL_TYPE_GENEVE] = "geneve", @@ -766,6 +772,21 @@ int ethtool_check_ops(const struct ethtool_ops *ops) return 0; } +void ethtool_ringparam_get_cfg(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kparam, + struct netlink_ext_ack *extack) +{ + memset(param, 0, sizeof(*param)); + memset(kparam, 0, sizeof(*kparam)); + + param->cmd = ETHTOOL_GRINGPARAM; + dev->ethtool_ops->get_ringparam(dev, param, kparam, extack); + + /* Driver gives us current state, we want to return current config */ + kparam->tcp_data_split = dev->cfg->hds_config; +} + static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info) { memset(info, 0, sizeof(*info)); diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 850eadde4bfc..a1088c2441d0 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -13,6 +13,7 @@ ETHTOOL_LINK_MODE_ ## speed ## base ## type ## _ ## duplex ## _BIT #define __SOF_TIMESTAMPING_CNT (const_ilog2(SOF_TIMESTAMPING_LAST) + 1) +#define __HWTSTAMP_FLAG_CNT (const_ilog2(HWTSTAMP_FLAG_LAST) + 1) struct link_mode_info { int speed; @@ -38,6 +39,7 @@ extern const char wol_mode_names[][ETH_GSTRING_LEN]; extern const char sof_timestamping_names[][ETH_GSTRING_LEN]; extern const char ts_tx_type_names[][ETH_GSTRING_LEN]; extern const char ts_rx_filter_names[][ETH_GSTRING_LEN]; +extern const char ts_flags_names[][ETH_GSTRING_LEN]; extern const char udp_tunnel_type_names[][ETH_GSTRING_LEN]; int __ethtool_get_link(struct net_device *dev); @@ -49,6 +51,12 @@ int ethtool_check_max_channel(struct net_device *dev, struct ethtool_channels channels, struct genl_info *info); int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context); + +void ethtool_ringparam_get_cfg(struct net_device *dev, + struct ethtool_ringparam *param, + struct kernel_ethtool_ringparam *kparam, + struct netlink_ext_ack *extack); + int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); int ethtool_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 34bee42e1247..1c3ba2247776 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -993,7 +993,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return rc; /* Nonzero ring with RSS only makes sense if NIC adds them together */ - if (cmd == ETHTOOL_SRXCLSRLINS && info.flow_type & FLOW_RSS && + if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS && !ops->cap_rss_rxnfc_adds && ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; @@ -2059,8 +2059,8 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) { - struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM }; struct kernel_ethtool_ringparam kernel_ringparam; + struct ethtool_ringparam ringparam, max; int ret; if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam) @@ -2069,7 +2069,7 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) return -EFAULT; - dev->ethtool_ops->get_ringparam(dev, &max, &kernel_ringparam, NULL); + ethtool_ringparam_get_cfg(dev, &max, &kernel_ringparam, NULL); /* ensure new ring parameters are within the maximums */ if (ringparam.rx_pending > max.rx_max_pending || diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 7839bfd1ac6a..aeedd5ec6b8c 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -215,17 +215,16 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info, static int ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) { - struct kernel_ethtool_ringparam kernel_ringparam = {}; - struct ethtool_ringparam ringparam = {}; + struct kernel_ethtool_ringparam kernel_ringparam; struct net_device *dev = req_info->dev; + struct ethtool_ringparam ringparam; struct nlattr **tb = info->attrs; const struct nlattr *err_attr; bool mod = false; int ret; - dev->ethtool_ops->get_ringparam(dev, &ringparam, - &kernel_ringparam, info->extack); - kernel_ringparam.tcp_data_split = dev->cfg->hds_config; + ethtool_ringparam_get_cfg(dev, &ringparam, &kernel_ringparam, + info->extack); ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); ethnl_update_u32(&ringparam.rx_mini_pending, diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 7cb106b590ab..58df9ad02ce8 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -107,6 +107,8 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, u32 total_size, indir_bytes; u8 *rss_config; + data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key; + ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context); if (!ctx) return -ENOENT; @@ -153,7 +155,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base, if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context) return -EOPNOTSUPP; - data->no_key_fields = !ops->rxfh_per_ctx_key; return rss_prepare_ctx(request, dev, data, info); } diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 818cf01f0911..6b76c05caba4 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -75,6 +75,11 @@ static const struct strset_info info_template[] = { .count = __HWTSTAMP_FILTER_CNT, .strings = ts_rx_filter_names, }, + [ETH_SS_TS_FLAGS] = { + .per_dev = false, + .count = __HWTSTAMP_FLAG_CNT, + .strings = ts_flags_names, + }, [ETH_SS_UDP_TUNNEL_TYPES] = { .per_dev = false, .count = __ETHTOOL_UDP_TUNNEL_TYPE_CNT, diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c index 9188e088fb2f..2be356bdfe87 100644 --- a/net/ethtool/tsconfig.c +++ b/net/ethtool/tsconfig.c @@ -54,7 +54,7 @@ static int tsconfig_prepare_data(const struct ethnl_req_info *req_base, data->hwtst_config.tx_type = BIT(cfg.tx_type); data->hwtst_config.rx_filter = BIT(cfg.rx_filter); - data->hwtst_config.flags = BIT(cfg.flags); + data->hwtst_config.flags = cfg.flags; data->hwprov_desc.index = -1; hwprov = rtnl_dereference(dev->hwprov); @@ -91,10 +91,16 @@ static int tsconfig_reply_size(const struct ethnl_req_info *req_base, BUILD_BUG_ON(__HWTSTAMP_TX_CNT > 32); BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT > 32); + BUILD_BUG_ON(__HWTSTAMP_FLAG_CNT > 32); - if (data->hwtst_config.flags) - /* _TSCONFIG_HWTSTAMP_FLAGS */ - len += nla_total_size(sizeof(u32)); + if (data->hwtst_config.flags) { + ret = ethnl_bitset32_size(&data->hwtst_config.flags, + NULL, __HWTSTAMP_FLAG_CNT, + ts_flags_names, compact); + if (ret < 0) + return ret; + len += ret; /* _TSCONFIG_HWTSTAMP_FLAGS */ + } if (data->hwtst_config.tx_type) { ret = ethnl_bitset32_size(&data->hwtst_config.tx_type, @@ -130,8 +136,10 @@ static int tsconfig_fill_reply(struct sk_buff *skb, int ret; if (data->hwtst_config.flags) { - ret = nla_put_u32(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, - data->hwtst_config.flags); + ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, + &data->hwtst_config.flags, NULL, + __HWTSTAMP_FLAG_CNT, + ts_flags_names, compact); if (ret < 0) return ret; } @@ -180,7 +188,7 @@ const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1] = [ETHTOOL_A_TSCONFIG_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER] = NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy), - [ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS] = { .type = NLA_U32 }, + [ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS] = { .type = NLA_NESTED }, [ETHTOOL_A_TSCONFIG_RX_FILTERS] = { .type = NLA_NESTED }, [ETHTOOL_A_TSCONFIG_TX_TYPES] = { .type = NLA_NESTED }, }; @@ -296,6 +304,7 @@ static int ethnl_set_tsconfig(struct ethnl_req_info *req_base, BUILD_BUG_ON(__HWTSTAMP_TX_CNT >= 32); BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT >= 32); + BUILD_BUG_ON(__HWTSTAMP_FLAG_CNT > 32); if (!netif_device_present(dev)) return -ENODEV; @@ -377,9 +386,13 @@ static int ethnl_set_tsconfig(struct ethnl_req_info *req_base, } if (tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS]) { - ethnl_update_u32(&hwtst_config.flags, - tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS], - &config_mod); + ret = ethnl_update_bitset32(&hwtst_config.flags, + __HWTSTAMP_FLAG_CNT, + tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS], + ts_flags_names, info->extack, + &config_mod); + if (ret < 0) + goto err_free_hwprov; } ret = net_hwtstamp_validate(&hwtst_config); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index cb9a7ed8abd3..814300eee39d 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -659,10 +659,12 @@ static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb */ void arp_xmit(struct sk_buff *skb) { + rcu_read_lock(); /* Send it off, maybe filter it using firewalling first. */ NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, - dev_net(skb->dev), NULL, skb, NULL, skb->dev, + dev_net_rcu(skb->dev), NULL, skb, NULL, skb->dev, arp_xmit_finish); + rcu_read_unlock(); } EXPORT_SYMBOL(arp_xmit); @@ -1075,7 +1077,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r, __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; if (!dev && (r->arp_flags & ATF_COM)) { - dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family, + dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, r->arp_ha.sa_data); if (!dev) return -ENODEV; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c8b3cf5fba4c..55b8151759bc 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1371,10 +1371,11 @@ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope) __be32 addr = 0; unsigned char localnet_scope = RT_SCOPE_HOST; struct in_device *in_dev; - struct net *net = dev_net(dev); + struct net *net; int master_idx; rcu_read_lock(); + net = dev_net_rcu(dev); in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto no_in_dev; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 094084b61bff..5482edb5aade 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -399,10 +399,10 @@ static void icmp_push_reply(struct sock *sk, static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) { - struct ipcm_cookie ipc; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->dst.dev); + struct net *net = dev_net_rcu(rt->dst.dev); bool apply_ratelimit = false; + struct ipcm_cookie ipc; struct flowi4 fl4; struct sock *sk; struct inet_sock *inet; @@ -608,12 +608,14 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, struct sock *sk; if (!rt) - goto out; + return; + + rcu_read_lock(); if (rt->dst.dev) - net = dev_net(rt->dst.dev); + net = dev_net_rcu(rt->dst.dev); else if (skb_in->dev) - net = dev_net(skb_in->dev); + net = dev_net_rcu(skb_in->dev); else goto out; @@ -785,7 +787,8 @@ out_unlock: icmp_xmit_unlock(sk); out_bh_enable: local_bh_enable(); -out:; +out: + rcu_read_unlock(); } EXPORT_SYMBOL(__icmp_send); @@ -834,7 +837,7 @@ static void icmp_socket_deliver(struct sk_buff *skb, u32 info) * avoid additional coding at protocol handlers. */ if (!pskb_may_pull(skb, iph->ihl * 4 + 8)) { - __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS); return; } @@ -868,7 +871,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) struct net *net; u32 info = 0; - net = dev_net(skb_dst(skb)->dev); + net = dev_net_rcu(skb_dst(skb)->dev); /* * Incomplete header ? @@ -979,7 +982,7 @@ out_err: static enum skb_drop_reason icmp_redirect(struct sk_buff *skb) { if (skb->len < sizeof(struct iphdr)) { - __ICMP_INC_STATS(dev_net(skb->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net_rcu(skb->dev), ICMP_MIB_INERRORS); return SKB_DROP_REASON_PKT_TOO_SMALL; } @@ -1011,7 +1014,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) struct icmp_bxm icmp_param; struct net *net; - net = dev_net(skb_dst(skb)->dev); + net = dev_net_rcu(skb_dst(skb)->dev); /* should there be an ICMP stat for ignored echos? */ if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all)) return SKB_NOT_DROPPED_YET; @@ -1040,9 +1043,9 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) { + struct net *net = dev_net_rcu(skb->dev); struct icmp_ext_hdr *ext_hdr, _ext_hdr; struct icmp_ext_echo_iio *iio, _iio; - struct net *net = dev_net(skb->dev); struct inet6_dev *in6_dev; struct in_device *in_dev; struct net_device *dev; @@ -1181,7 +1184,7 @@ static enum skb_drop_reason icmp_timestamp(struct sk_buff *skb) return SKB_NOT_DROPPED_YET; out_err: - __ICMP_INC_STATS(dev_net(skb_dst(skb)->dev), ICMP_MIB_INERRORS); + __ICMP_INC_STATS(dev_net_rcu(skb_dst(skb)->dev), ICMP_MIB_INERRORS); return SKB_DROP_REASON_PKT_TOO_SMALL; } @@ -1198,7 +1201,7 @@ int icmp_rcv(struct sk_buff *skb) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct rtable *rt = skb_rtable(skb); - struct net *net = dev_net(rt->dst.dev); + struct net *net = dev_net_rcu(rt->dst.dev); struct icmphdr *icmph; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { @@ -1371,9 +1374,9 @@ int icmp_err(struct sk_buff *skb, u32 info) struct iphdr *iph = (struct iphdr *)skb->data; int offset = iph->ihl<<2; struct icmphdr *icmph = (struct icmphdr *)(skb->data + offset); + struct net *net = dev_net_rcu(skb->dev); int type = icmp_hdr(skb)->type; int code = icmp_hdr(skb)->code; - struct net *net = dev_net(skb->dev); /* * Use ping_err to handle all icmp errors except those diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 577b88a43293..753704f75b2c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -390,7 +390,13 @@ static inline int ip_rt_proc_init(void) static inline bool rt_is_expired(const struct rtable *rth) { - return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev)); + bool res; + + rcu_read_lock(); + res = rth->rt_genid != rt_genid_ipv4(dev_net_rcu(rth->dst.dev)); + rcu_read_unlock(); + + return res; } void rt_cache_flush(struct net *net) @@ -1002,9 +1008,9 @@ out: kfree_skb_reason(skb, reason); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; - struct net *net = dev_net(dst->dev); struct fib_result res; bool lock = false; + struct net *net; u32 old_mtu; if (ip_mtu_locked(dst)) @@ -1014,6 +1020,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (old_mtu < mtu) return; + rcu_read_lock(); + net = dev_net_rcu(dst->dev); if (mtu < net->ipv4.ip_rt_min_pmtu) { lock = true; mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); @@ -1021,9 +1029,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2)) - return; + goto out; - rcu_read_lock(); if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh_common *nhc; @@ -1037,14 +1044,14 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + net->ipv4.ip_rt_mtu_expires); } - rcu_read_unlock(); - return; + goto out; } #endif /* CONFIG_IP_ROUTE_MULTIPATH */ nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + net->ipv4.ip_rt_mtu_expires); } +out: rcu_read_unlock(); } @@ -1307,10 +1314,15 @@ static void set_class_tag(struct rtable *rt, u32 tag) static unsigned int ipv4_default_advmss(const struct dst_entry *dst) { - struct net *net = dev_net(dst->dev); unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); - unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, - net->ipv4.ip_rt_min_advmss); + unsigned int advmss; + struct net *net; + + rcu_read_lock(); + net = dev_net_rcu(dst->dev); + advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, + net->ipv4.ip_rt_min_advmss); + rcu_read_unlock(); return min(advmss, IPV4_MAX_PMTU - header_size); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0d704bda6c41..57df7c1d2faa 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1565,12 +1565,13 @@ EXPORT_SYMBOL(tcp_recv_skb); * or for 'peeking' the socket using this routine * (although both would be easy to implement). */ -int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, - sk_read_actor_t recv_actor) +static int __tcp_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack, + u32 *copied_seq) { struct sk_buff *skb; struct tcp_sock *tp = tcp_sk(sk); - u32 seq = tp->copied_seq; + u32 seq = *copied_seq; u32 offset; int copied = 0; @@ -1624,9 +1625,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_eat_recv_skb(sk, skb); if (!desc->count) break; - WRITE_ONCE(tp->copied_seq, seq); + WRITE_ONCE(*copied_seq, seq); } - WRITE_ONCE(tp->copied_seq, seq); + WRITE_ONCE(*copied_seq, seq); + + if (noack) + goto out; tcp_rcv_space_adjust(sk); @@ -1635,10 +1639,25 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, tcp_recv_skb(sk, seq, &offset); tcp_cleanup_rbuf(sk, copied); } +out: return copied; } + +int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor) +{ + return __tcp_read_sock(sk, desc, recv_actor, false, + &tcp_sk(sk)->copied_seq); +} EXPORT_SYMBOL(tcp_read_sock); +int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack, + u32 *copied_seq) +{ + return __tcp_read_sock(sk, desc, recv_actor, noack, copied_seq); +} + int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { struct sk_buff *skb; @@ -2438,14 +2457,12 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, */ memset(&dmabuf_cmsg, 0, sizeof(dmabuf_cmsg)); dmabuf_cmsg.frag_size = copy; - err = put_cmsg(msg, SOL_SOCKET, SO_DEVMEM_LINEAR, - sizeof(dmabuf_cmsg), &dmabuf_cmsg); - if (err || msg->msg_flags & MSG_CTRUNC) { - msg->msg_flags &= ~MSG_CTRUNC; - if (!err) - err = -ETOOSMALL; + err = put_cmsg_notrunc(msg, SOL_SOCKET, + SO_DEVMEM_LINEAR, + sizeof(dmabuf_cmsg), + &dmabuf_cmsg); + if (err) goto out; - } sent += copy; @@ -2499,16 +2516,12 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, offset += copy; remaining_len -= copy; - err = put_cmsg(msg, SOL_SOCKET, - SO_DEVMEM_DMABUF, - sizeof(dmabuf_cmsg), - &dmabuf_cmsg); - if (err || msg->msg_flags & MSG_CTRUNC) { - msg->msg_flags &= ~MSG_CTRUNC; - if (!err) - err = -ETOOSMALL; + err = put_cmsg_notrunc(msg, SOL_SOCKET, + SO_DEVMEM_DMABUF, + sizeof(dmabuf_cmsg), + &dmabuf_cmsg); + if (err) goto out; - } atomic_long_inc(&niov->pp_ref_count); tcp_xa_pool.netmems[tcp_xa_pool.idx++] = skb_frag_netmem(frag); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 47f65b1b70ca..ba581785adb4 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -646,6 +646,42 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops) ops->sendmsg == tcp_sendmsg ? 0 : -ENOTSUPP; } +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) +int tcp_bpf_strp_read_sock(struct strparser *strp, read_descriptor_t *desc, + sk_read_actor_t recv_actor) +{ + struct sock *sk = strp->sk; + struct sk_psock *psock; + struct tcp_sock *tp; + int copied = 0; + + tp = tcp_sk(sk); + rcu_read_lock(); + psock = sk_psock(sk); + if (WARN_ON_ONCE(!psock)) { + desc->error = -EINVAL; + goto out; + } + + psock->ingress_bytes = 0; + copied = tcp_read_sock_noack(sk, desc, recv_actor, true, + &psock->copied_seq); + if (copied < 0) + goto out; + /* recv_actor may redirect skb to another socket (SK_REDIRECT) or + * just put skb into ingress queue of current socket (SK_PASS). + * For SK_REDIRECT, we need to ack the frame immediately but for + * SK_PASS, we want to delay the ack until tcp_bpf_recvmsg_parser(). + */ + tp->copied_seq = psock->copied_seq - psock->ingress_bytes; + tcp_rcv_space_adjust(sk); + __tcp_cleanup_rbuf(sk, copied - psock->ingress_bytes); +out: + rcu_read_unlock(); + return copied; +} +#endif /* CONFIG_BPF_STREAM_PARSER */ + int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 0f523cbfe329..32b28fc21b63 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -178,7 +178,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) if (!skb) return; - skb_dst_drop(skb); + tcp_cleanup_skb(skb); /* segs_in has been initialized to 1 in tcp_create_openreq_child(). * Hence, reset segs_in to 0 before calling tcp_segs_in() * to avoid double counting. Also, tcp_segs_in() expects @@ -195,7 +195,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN; tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); tp->syn_data_acked = 1; /* u64_stats_update_begin(&tp->syncp) not needed here, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb82e01da911..0cbf81bf3d45 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -243,9 +243,15 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) do_div(val, skb->truesize); tcp_sk(sk)->scaling_ratio = val ? val : 1; - if (old_ratio != tcp_sk(sk)->scaling_ratio) - WRITE_ONCE(tcp_sk(sk)->window_clamp, - tcp_win_from_space(sk, sk->sk_rcvbuf)); + if (old_ratio != tcp_sk(sk)->scaling_ratio) { + struct tcp_sock *tp = tcp_sk(sk); + + val = tcp_win_from_space(sk, sk->sk_rcvbuf); + tcp_set_window_clamp(sk, val); + + if (tp->window_clamp < tp->rcvq_space.space) + tp->rcvq_space.space = tp->window_clamp; + } } icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); @@ -4970,7 +4976,7 @@ static void tcp_ofo_queue(struct sock *sk) tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); else kfree_skb_partial(skb, fragstolen); @@ -5162,7 +5168,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, skb, fragstolen)) ? 1 : 0; tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { - __skb_queue_tail(&sk->sk_receive_queue, skb); + tcp_add_receive_queue(sk, skb); skb_set_owner_r(skb, sk); } return eaten; @@ -5245,7 +5251,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) __kfree_skb(skb); return; } - skb_dst_drop(skb); + tcp_cleanup_skb(skb); __skb_pull(skb, tcp_hdr(skb)->doff * 4); reason = SKB_DROP_REASON_NOT_SPECIFIED; @@ -6226,7 +6232,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); /* Bulk data transfer: receiver */ - skb_dst_drop(skb); + tcp_cleanup_skb(skb); __skb_pull(skb, tcp_header_len); eaten = tcp_queue_rcv(sk, skb, &fragstolen); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index cc2b5194a18d..2632844d2c35 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2027,7 +2027,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, */ skb_condense(skb); - skb_dst_drop(skb); + tcp_cleanup_skb(skb); if (unlikely(tcp_checksum_complete(skb))) { bh_unlock_sock(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index b089b08e9617..dfdb7a4608a8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -815,12 +815,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* In sequence, PAWS is OK. */ - /* TODO: We probably should defer ts_recent change once - * we take ownership of @req. - */ - if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) - WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval); - if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { /* Truncate SYN, it is out of window starting at tcp_rsk(req)->rcv_isn + 1. */ @@ -869,6 +863,10 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!child) goto listen_overflow; + if (own_req && tmp_opt.saw_tstamp && + !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) + tcp_sk(child)->rx_opt.ts_recent = tmp_opt.rcv_tsval; + if (own_req && rsk_drop_req(req)) { reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req); inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c472c9a57cf6..a9bb9ce5438e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,9 +1141,9 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a6984a29fdb9..4d14ab7f7e99 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -76,7 +76,7 @@ static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */ struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset); - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); if (type == ICMPV6_PKT_TOOBIG) ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); @@ -473,7 +473,10 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (!skb->dev) return; - net = dev_net(skb->dev); + + rcu_read_lock(); + + net = dev_net_rcu(skb->dev); mark = IP6_REPLY_MARK(net, skb->mark); /* * Make sure we respect the rules @@ -496,7 +499,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, !(type == ICMPV6_PARAMPROB && code == ICMPV6_UNK_OPTION && (opt_unrec(skb, info)))) - return; + goto out; saddr = NULL; } @@ -526,7 +529,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) { net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); - return; + goto out; } /* @@ -535,7 +538,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (is_ineligible(skb)) { net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n", &hdr->saddr, &hdr->daddr); - return; + goto out; } /* Needed by both icmpv6_global_allow and icmpv6_xmit_lock */ @@ -582,7 +585,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6, apply_ratelimit)) - goto out; + goto out_unlock; tmp_hdr.icmp6_type = type; tmp_hdr.icmp6_code = code; @@ -600,7 +603,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, dst = icmpv6_route_lookup(net, skb, sk, &fl6); if (IS_ERR(dst)) - goto out; + goto out_unlock; ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); @@ -616,7 +619,6 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, goto out_dst_release; } - rcu_read_lock(); idev = __in6_dev_get(skb->dev); if (ip6_append_data(sk, icmpv6_getfrag, &msg, @@ -630,13 +632,15 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, len + sizeof(struct icmp6hdr)); } - rcu_read_unlock(); + out_dst_release: dst_release(dst); -out: +out_unlock: icmpv6_xmit_unlock(sk); out_bh_enable: local_bh_enable(); +out: + rcu_read_unlock(); } EXPORT_SYMBOL(icmp6_send); @@ -679,8 +683,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, skb_pull(skb2, nhs); skb_reset_network_header(skb2); - rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, - skb, 0); + rt = rt6_lookup(dev_net_rcu(skb->dev), &ipv6_hdr(skb2)->saddr, + NULL, 0, skb, 0); if (rt && rt->dst.dev) skb2->dev = rt->dst.dev; @@ -717,7 +721,7 @@ EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach); static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); struct sock *sk; struct inet6_dev *idev; struct ipv6_pinfo *np; @@ -832,7 +836,7 @@ enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info) { struct inet6_skb_parm *opt = IP6CB(skb); - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); const struct inet6_protocol *ipprot; enum skb_drop_reason reason; int inner_offset; @@ -889,7 +893,7 @@ out: static int icmpv6_rcv(struct sk_buff *skb) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); struct net_device *dev = icmp6_dev(skb); struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; @@ -921,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS); + __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; @@ -939,7 +943,7 @@ static int icmpv6_rcv(struct sk_buff *skb) type = hdr->icmp6_type; - ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type); + ICMP6MSGIN_INC_STATS(dev_net_rcu(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: @@ -1034,9 +1038,9 @@ static int icmpv6_rcv(struct sk_buff *skb) csum_error: reason = SKB_DROP_REASON_ICMP_CSUM; - __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS); + __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_CSUMERRORS); discard_it: - __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS); + __ICMP6_INC_STATS(dev_net_rcu(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb_reason(skb, reason); return 0; diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 28e5a89dc255..2c383c12a431 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -336,7 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb), *cache_dst; + struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; struct in6_addr orig_daddr; struct ioam6_lwt *ilwt; int err = -EINVAL; @@ -407,13 +407,15 @@ do_encap: cache_dst = ip6_route_output(net, NULL, &fl6); if (cache_dst->error) { err = cache_dst->error; - dst_release(cache_dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (dst->lwtstate != cache_dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev)); if (unlikely(err)) @@ -426,8 +428,10 @@ do_encap: return dst_output(net, sk, skb); } out: + dst_release(cache_dst); return dst->lwtstate->orig_output(net, sk, skb); drop: + dst_release(cache_dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 70c0e16c0ae6..39da6a7ce5f1 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -477,9 +477,7 @@ discard: static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { skb_clear_delivery_time(skb); - rcu_read_lock(); ip6_protocol_deliver_rcu(net, skb, 0, false); - rcu_read_unlock(); return 0; } @@ -487,9 +485,15 @@ static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *sk int ip6_input(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, - dev_net(skb->dev), NULL, skb, skb->dev, NULL, - ip6_input_finish); + int res; + + rcu_read_lock(); + res = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, + dev_net_rcu(skb->dev), NULL, skb, skb->dev, NULL, + ip6_input_finish); + rcu_read_unlock(); + + return res; } EXPORT_SYMBOL_GPL(ip6_input); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9dfdb40988b0..65831b4fee1f 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1773,21 +1773,19 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) struct net_device *dev = idev->dev; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - struct net *net = dev_net(dev); const struct in6_addr *saddr; struct in6_addr addr_buf; struct mld2_report *pmr; struct sk_buff *skb; unsigned int size; struct sock *sk; - int err; + struct net *net; - sk = net->ipv6.igmp_sk; /* we assume size > sizeof(ra) here * Also try to not allocate high-order pages for big MTU */ size = min_t(int, mtu, PAGE_SIZE / 2) + hlen + tlen; - skb = sock_alloc_send_skb(sk, size, 1, &err); + skb = alloc_skb(size, GFP_KERNEL); if (!skb) return NULL; @@ -1795,6 +1793,12 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) skb_reserve(skb, hlen); skb_tailroom_reserve(skb, mtu, tlen); + rcu_read_lock(); + + net = dev_net_rcu(dev); + sk = net->ipv6.igmp_sk; + skb_set_owner_w(skb, sk); + if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { /* <draft-ietf-magma-mld-source-05.txt>: * use unspecified address as the source address @@ -1806,6 +1810,8 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu) ip6_mc_hdr(sk, skb, dev, saddr, &mld2_all_mcr, NEXTHDR_HOP, 0); + rcu_read_unlock(); + skb_put_data(skb, ra, sizeof(ra)); skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data); @@ -2165,21 +2171,21 @@ static void mld_send_cr(struct inet6_dev *idev) static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) { - struct net *net = dev_net(dev); - struct sock *sk = net->ipv6.igmp_sk; + const struct in6_addr *snd_addr, *saddr; + int err, len, payload_len, full_len; + struct in6_addr addr_buf; struct inet6_dev *idev; struct sk_buff *skb; struct mld_msg *hdr; - const struct in6_addr *snd_addr, *saddr; - struct in6_addr addr_buf; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - int err, len, payload_len, full_len; u8 ra[8] = { IPPROTO_ICMPV6, 0, IPV6_TLV_ROUTERALERT, 2, 0, 0, IPV6_TLV_PADN, 0 }; - struct flowi6 fl6; struct dst_entry *dst; + struct flowi6 fl6; + struct net *net; + struct sock *sk; if (type == ICMPV6_MGM_REDUCTION) snd_addr = &in6addr_linklocal_allrouters; @@ -2190,19 +2196,21 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) payload_len = len + sizeof(ra); full_len = sizeof(struct ipv6hdr) + payload_len; - rcu_read_lock(); - IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS); - rcu_read_unlock(); + skb = alloc_skb(hlen + tlen + full_len, GFP_KERNEL); - skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); + rcu_read_lock(); + net = dev_net_rcu(dev); + idev = __in6_dev_get(dev); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); if (!skb) { - rcu_read_lock(); - IP6_INC_STATS(net, __in6_dev_get(dev), - IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return; } + sk = net->ipv6.igmp_sk; + skb_set_owner_w(skb, sk); + skb->priority = TC_PRIO_CONTROL; skb_reserve(skb, hlen); @@ -2227,9 +2235,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) IPPROTO_ICMPV6, csum_partial(hdr, len, 0)); - rcu_read_lock(); - idev = __in6_dev_get(skb->dev); - icmpv6_flow_init(sk, &fl6, type, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->dev->ifindex); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index d044c67019de..8699d1a188dc 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -418,15 +418,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; - struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); - if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", - __func__); + if (!skb) return NULL; - } skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -437,7 +433,9 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, /* Manually assign socket ownership as we avoid calling * sock_alloc_send_pskb() to bypass wmem buffer limits */ - skb_set_owner_w(skb, sk); + rcu_read_lock(); + skb_set_owner_w(skb, dev_net_rcu(dev)->ipv6.ndisc_sk); + rcu_read_unlock(); return skb; } @@ -473,16 +471,20 @@ static void ip6_nd_hdr(struct sk_buff *skb, void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct icmp6hdr *icmp6h = icmp6_hdr(skb); struct dst_entry *dst = skb_dst(skb); - struct net *net = dev_net(skb->dev); - struct sock *sk = net->ipv6.ndisc_sk; struct inet6_dev *idev; + struct net *net; + struct sock *sk; int err; - struct icmp6hdr *icmp6h = icmp6_hdr(skb); u8 type; type = icmp6h->icmp6_type; + rcu_read_lock(); + + net = dev_net_rcu(skb->dev); + sk = net->ipv6.ndisc_sk; if (!dst) { struct flowi6 fl6; int oif = skb->dev->ifindex; @@ -490,6 +492,7 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { + rcu_read_unlock(); kfree_skb(skb); return; } @@ -504,7 +507,6 @@ void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); - rcu_read_lock(); idev = __in6_dev_get(dst->dev); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); @@ -1694,7 +1696,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) bool ret; if (netif_is_l3_master(skb->dev)) { - dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); + dev = dev_get_by_index_rcu(dev_net(skb->dev), IPCB(skb)->iif); if (!dev) return; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 78362822b907..ef2d23a1e3d5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3196,13 +3196,18 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; unsigned int mtu = dst_mtu(dst); - struct net *net = dev_net(dev); + struct net *net; mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); + rcu_read_lock(); + + net = dev_net_rcu(dev); if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) mtu = net->ipv6.sysctl.ip6_rt_min_advmss; + rcu_read_unlock(); + /* * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 7ba22d2f2bfe..7c05ac846646 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -232,13 +232,15 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -251,6 +253,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } @@ -259,23 +262,35 @@ static int rpl_input(struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; + struct lwtunnel_state *lwtst; struct rpl_lwt *rlwt; int err; - rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); + /* We cannot dereference "orig_dst" once ip6_route_input() or + * skb_dst_drop() is called. However, in order to detect a dst loop, we + * need the address of its lwtstate. So, save the address of lwtstate + * now and use it later as a comparison. + */ + lwtst = orig_dst->lwtstate; + + rlwt = rpl_lwt_lwtunnel(lwtst); local_bh_disable(); dst = dst_cache_get(&rlwt->cache); local_bh_enable(); err = rpl_do_srh(skb, rlwt, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); - if (!dst->error) { + + /* cache only if we don't create a dst reference loop */ + if (!dst->error && lwtst != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&rlwt->cache, dst, &ipv6_hdr(skb)->saddr); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 4bf937bfc263..51583461ae29 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -472,23 +472,35 @@ static int seg6_input_core(struct net *net, struct sock *sk, { struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; + struct lwtunnel_state *lwtst; struct seg6_lwt *slwt; int err; - slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); + /* We cannot dereference "orig_dst" once ip6_route_input() or + * skb_dst_drop() is called. However, in order to detect a dst loop, we + * need the address of its lwtstate. So, save the address of lwtstate + * now and use it later as a comparison. + */ + lwtst = orig_dst->lwtstate; + + slwt = seg6_lwt_lwtunnel(lwtst); local_bh_disable(); dst = dst_cache_get(&slwt->cache); local_bh_enable(); err = seg6_do_srh(skb, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); - if (!dst->error) { + + /* cache only if we don't create a dst reference loop */ + if (!dst->error && lwtst != dst->lwtstate) { local_bh_disable(); dst_cache_set_ip6(&slwt->cache, dst, &ipv6_hdr(skb)->saddr); @@ -571,13 +583,15 @@ static int seg6_output_core(struct net *net, struct sock *sk, dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -593,6 +607,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6671daa67f4f..c6ea438b5c75 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1389,9 +1389,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 572d160edca3..c0e47f4f7b1a 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1514,11 +1514,6 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, if (mptcp_pm_is_userspace(msk)) goto next; - if (list_empty(&msk->conn_list)) { - mptcp_pm_remove_anno_addr(msk, addr, false); - goto next; - } - lock_sock(sk); remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index f6a207958459..ad21925af061 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1199,6 +1199,8 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk) pr_debug("TCP fallback already done (msk=%p)\n", msk); return; } + if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback))) + return; set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index fd021cf8286e..9f18217dddc8 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1142,7 +1142,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (data_len == 0) { pr_debug("infinite mapping received\n"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); - subflow->map_data_len = 0; return MAPPING_INVALID; } @@ -1286,18 +1285,6 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss mptcp_schedule_work(sk); } -static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) -{ - struct mptcp_sock *msk = mptcp_sk(subflow->conn); - - if (subflow->mp_join) - return false; - else if (READ_ONCE(msk->csum_enabled)) - return !subflow->valid_csum_seen; - else - return READ_ONCE(msk->allow_infinite_fallback); -} - static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -1393,7 +1380,7 @@ fallback: return true; } - if (!subflow_can_fallback(subflow) && subflow->map_data_len) { + if (!READ_ONCE(msk->allow_infinite_fallback)) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ @@ -1772,10 +1759,7 @@ int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, * needs it. * Update ns_tracker to current stack trace and refcounted tracker. */ - __netns_tracker_free(net, &sf->sk->ns_tracker, false); - sf->sk->sk_net_refcnt = 1; - get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sf->sk); err = tcp_set_ulp(sf->sk, "mptcp"); if (err) goto err_free; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 97c6eb8847a0..8cd4cf7ae211 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -381,10 +381,8 @@ static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx, flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; - if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) { - flow_offload_teardown(flow); + if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) return 0; - } iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset); thoff = (iph->ihl * 4) + ctx->offset; @@ -662,10 +660,8 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx, flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset; - if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) { - flow_offload_teardown(flow); + if (unlikely(nf_flow_exceeds_mtu(skb, mtu))) return 0; - } ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset); thoff = sizeof(*ip6h) + ctx->offset; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 85311226183a..a53ea60d0a78 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -795,16 +795,6 @@ static int netlink_release(struct socket *sock) sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); - /* Because struct net might disappear soon, do not keep a pointer. */ - if (!sk->sk_net_refcnt && sock_net(sk) != &init_net) { - __netns_tracker_free(sock_net(sk), &sk->ns_tracker, false); - /* Because of deferred_put_nlk_sk and use of work queue, - * it is possible netns will be freed before this socket. - */ - sock_net_set(sk, &init_net); - __netns_tracker_alloc(&init_net, &sk->ns_tracker, - false, GFP_KERNEL); - } call_rcu(&nlk->rcu, deferred_put_nlk_sk); return 0; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 225f6048867f..5d548eda742d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2101,6 +2101,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; + struct net *net_vport; int err; ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, @@ -2117,12 +2118,15 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) goto nla_put_failure; - if (!net_eq(net, dev_net(vport->dev))) { - int id = peernet2id_alloc(net, dev_net(vport->dev), gfp); + rcu_read_lock(); + net_vport = dev_net_rcu(vport->dev); + if (!net_eq(net, net_vport)) { + int id = peernet2id_alloc(net, net_vport, GFP_ATOMIC); if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) - goto nla_put_failure; + goto nla_put_failure_unlock; } + rcu_read_unlock(); ovs_vport_get_stats(vport, &vport_stats); if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, @@ -2143,6 +2147,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, genlmsg_end(skb, ovs_header); return 0; +nla_put_failure_unlock: + rcu_read_unlock(); nla_put_failure: err = -EMSGSIZE; error: diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 0581c53e6517..3cc2f303bf78 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -504,12 +504,8 @@ bool rds_tcp_tune(struct socket *sock) release_sock(sk); return false; } - /* Update ns_tracker to current stack trace and refcounted tracker */ - __netns_tracker_free(net, &sk->ns_tracker, false); - - sk->sk_net_refcnt = 1; - netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sk); + put_net(net); } rtn = net_generic(net, rds_tcp_netid); if (rtn->sndbuf_size > 0) { diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 72c65d938a15..a4a668b88a8f 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -701,11 +701,9 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net_device *dev; ax25_address *source; ax25_uid_assoc *user; + int err = -EINVAL; int n; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; - if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose)) return -EINVAL; @@ -718,8 +716,15 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; - if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) - return -EADDRNOTAVAIL; + lock_sock(sk); + + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_release; + + err = -EADDRNOTAVAIL; + dev = rose_dev_get(&addr->srose_addr); + if (!dev) + goto out_release; source = &addr->srose_call; @@ -730,7 +735,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); - return -EACCES; + err = -EACCES; + goto out_release; } rose->source_call = *source; } @@ -753,8 +759,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) rose_insert_socket(sk); sock_reset_flag(sk, SOCK_ZAPPED); - - return 0; + err = 0; +out_release: + release_sock(sk); + return err; } static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 718193df9d2e..a64a0cab1bf7 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -327,8 +327,8 @@ struct rxrpc_local { * packet with a maximum set of jumbo subpackets or a PING ACK padded * out to 64K with zeropages for PMTUD. */ - struct kvec kvec[RXRPC_MAX_NR_JUMBO > 3 + 16 ? - RXRPC_MAX_NR_JUMBO : 3 + 16]; + struct kvec kvec[1 + RXRPC_MAX_NR_JUMBO > 3 + 16 ? + 1 + RXRPC_MAX_NR_JUMBO : 3 + 16]; }; /* @@ -360,7 +360,6 @@ struct rxrpc_peer { u8 pmtud_jumbo; /* Max jumbo packets for the MTU */ bool ackr_adv_pmtud; /* T if the peer advertises path-MTU */ unsigned int ackr_max_data; /* Maximum data advertised by peer */ - seqcount_t mtu_lock; /* Lockless MTU access management */ unsigned int if_mtu; /* Local interface MTU (- hdrsize) for this peer */ unsigned int max_data; /* Maximum packet data capacity for this peer */ unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ @@ -582,6 +581,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */ + RXRPC_CALL_CONN_CHALLENGING, /* The connection is being challenged */ }; /* @@ -602,7 +602,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ - RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ @@ -874,8 +873,7 @@ struct rxrpc_txbuf { #define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */ __be16 cksum; /* Checksum to go in header */ bool jumboable; /* Can be non-terminal jumbo subpacket */ - u8 nr_kvec; /* Amount of kvec[] used */ - struct kvec kvec[1]; + void *data; /* Data with preceding jumbo header */ }; static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 5a543c3f6fb0..c4c8b46a68c6 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -22,7 +22,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", - [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", @@ -453,17 +452,16 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->cong_tstamp = skb->tstamp; __set_bit(RXRPC_CALL_EXPOSED, &call->flags); - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); spin_lock(&conn->state_lock); switch (conn->state) { case RXRPC_CONN_SERVICE_UNSECURED: case RXRPC_CONN_SERVICE_CHALLENGING: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + __set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags); break; case RXRPC_CONN_SERVICE: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); break; case RXRPC_CONN_ABORTED: diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 713e04394ceb..4d9c5e21ba78 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -228,10 +228,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { - if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) { - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); + if (call && __test_and_clear_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) rxrpc_notify_socket(call); - } } /* @@ -272,6 +270,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, * we've already received the packet, put it on the * front of the queue. */ + sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured); skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED; rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured); skb_queue_head(&conn->local->rx_queue, skb); @@ -437,14 +436,16 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) rxrpc_abort_calls(conn); - switch (skb->mark) { - case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: - if (conn->state != RXRPC_CONN_SERVICE) - break; + if (skb) { + switch (skb->mark) { + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + if (conn->state != RXRPC_CONN_SERVICE) + break; - for (loop = 0; loop < RXRPC_MAXCALLS; loop++) - rxrpc_call_is_secure(conn->channels[loop].call); - break; + for (loop = 0; loop < RXRPC_MAXCALLS; loop++) + rxrpc_call_is_secure(conn->channels[loop].call); + break; + } } /* Process delayed ACKs whose time has come. */ diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 7eba4d7d9a38..2f1fd1e2e7e4 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); + INIT_LIST_HEAD(&conn->attend_link); mutex_init(&conn->security_lock); mutex_init(&conn->tx_data_alloc_lock); skb_queue_head_init(&conn->rx_queue); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 4974b5accafa..24aceb183c2c 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -448,11 +448,19 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); bool last = sp->hdr.flags & RXRPC_LAST_PACKET; - skb_queue_tail(&call->recvmsg_queue, skb); + spin_lock_irq(&call->recvmsg_queue.lock); + + __skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); if (last) + /* Change the state inside the lock so that recvmsg syncs + * correctly with it and using sendmsg() to send a reply + * doesn't race. + */ rxrpc_end_rx_phase(call, sp->hdr.serial); + + spin_unlock_irq(&call->recvmsg_queue.lock); } /* @@ -657,7 +665,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb rxrpc_propose_delay_ACK(call, sp->hdr.serial, rxrpc_propose_ack_input_data); } - if (notify) { + if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) { trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); rxrpc_notify_socket(call); } @@ -802,9 +810,7 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb if (max_mtu < peer->max_data) { trace_rxrpc_pmtud_reduce(peer, sp->hdr.serial, max_mtu, rxrpc_pmtud_reduce_ack); - write_seqcount_begin(&peer->mtu_lock); peer->max_data = max_mtu; - write_seqcount_end(&peer->mtu_lock); } max_data = umin(max_mtu, peer->max_data); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 6f7a125d6e90..95905b85a8d7 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -428,13 +428,13 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_send_data_req *req, struct rxrpc_txbuf *txb, + struct rxrpc_wire_header *whdr, rxrpc_serial_t serial, int subpkt) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; - struct rxrpc_jumbo_header *jumbo = (void *)(whdr + 1) - sizeof(*jumbo); + struct rxrpc_jumbo_header *jumbo = txb->data - sizeof(*jumbo); enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; - struct kvec *kv = &call->local->kvec[subpkt]; + struct kvec *kv = &call->local->kvec[1 + subpkt]; size_t len = txb->pkt_len; bool last; u8 flags; @@ -491,18 +491,15 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, } dont_set_request_ack: - /* The jumbo header overlays the wire header in the txbuf. */ + /* There's a jumbo header prepended to the data if we need it. */ if (subpkt < req->n - 1) flags |= RXRPC_JUMBO_PACKET; else flags &= ~RXRPC_JUMBO_PACKET; if (subpkt == 0) { whdr->flags = flags; - whdr->serial = htonl(txb->serial); whdr->cksum = txb->cksum; - whdr->serviceId = htons(conn->service_id); - kv->iov_base = whdr; - len += sizeof(*whdr); + kv->iov_base = txb->data; } else { jumbo->flags = flags; jumbo->pad = 0; @@ -535,7 +532,9 @@ static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq, /* * Prepare a (jumbo) packet for transmission. */ -static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) +static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, + struct rxrpc_send_data_req *req, + struct rxrpc_wire_header *whdr) { struct rxrpc_txqueue *tq = req->tq; rxrpc_serial_t serial; @@ -549,6 +548,18 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se /* Each transmission of a Tx packet needs a new serial number */ serial = rxrpc_get_next_serials(call->conn, req->n); + whdr->epoch = htonl(call->conn->proto.epoch); + whdr->cid = htonl(call->cid); + whdr->callNumber = htonl(call->call_id); + whdr->seq = htonl(seq); + whdr->serial = htonl(serial); + whdr->type = RXRPC_PACKET_TYPE_DATA; + whdr->flags = 0; + whdr->userStatus = 0; + whdr->securityIndex = call->security_ix; + whdr->_rsvd = 0; + whdr->serviceId = htons(call->conn->service_id); + call->tx_last_serial = serial + req->n - 1; call->tx_last_sent = req->now; xmit_ts = rxrpc_prepare_txqueue(tq, req); @@ -576,7 +587,7 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se if (i + 1 == req->n) /* Only sample the last subpacket in a jumbo. */ __set_bit(ix, &tq->rtt_samples); - len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i); + len += rxrpc_prepare_data_subpacket(call, req, txb, whdr, serial, i); serial++; seq++; i++; @@ -618,6 +629,7 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se } rxrpc_set_keepalive(call, req->now); + page_frag_free(whdr); return len; } @@ -626,25 +638,33 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se */ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) { + struct rxrpc_wire_header *whdr; struct rxrpc_connection *conn = call->conn; enum rxrpc_tx_point frag; struct rxrpc_txqueue *tq = req->tq; struct rxrpc_txbuf *txb; struct msghdr msg; rxrpc_seq_t seq = req->seq; - size_t len; + size_t len = sizeof(*whdr); bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags); int ret, stat_ix; _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1); + whdr = page_frag_alloc(&call->local->tx_alloc, sizeof(*whdr), GFP_NOFS); + if (!whdr) + return; /* Drop the packet if no memory. */ + + call->local->kvec[0].iov_base = whdr; + call->local->kvec[0].iov_len = sizeof(*whdr); + stat_ix = umin(req->n, ARRAY_SIZE(call->rxnet->stat_tx_jumbo)) - 1; atomic_inc(&call->rxnet->stat_tx_jumbo[stat_ix]); - len = rxrpc_prepare_data_packet(call, req); + len += rxrpc_prepare_data_packet(call, req, whdr); txb = tq->bufs[seq & RXRPC_TXQ_MASK]; - iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, req->n, len); + iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, 1 + req->n, len); msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -695,13 +715,13 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req if (ret == -EMSGSIZE) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize); - trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag); + trace_rxrpc_tx_packet(call->debug_id, whdr, frag); ret = 0; } else if (ret < 0) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag); } else { - trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag); + trace_rxrpc_tx_packet(call->debug_id, whdr, frag); } rxrpc_tx_backoff(call, ret); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index e874c31fa901..7f4729234957 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -130,9 +130,7 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) peer->pmtud_bad = max_data + 1; trace_rxrpc_pmtud_reduce(peer, 0, max_data, rxrpc_pmtud_reduce_icmp); - write_seqcount_begin(&peer->mtu_lock); peer->max_data = max_data; - write_seqcount_end(&peer->mtu_lock); } } @@ -169,6 +167,13 @@ void rxrpc_input_error(struct rxrpc_local *local, struct sk_buff *skb) goto out; } + if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 && + serr->ee.ee_type == ICMPV6_PKT_TOOBIG && + serr->ee.ee_code == 0)) { + rxrpc_adjust_mtu(peer, serr->ee.ee_info); + goto out; + } + rxrpc_store_error(peer, skb); out: rxrpc_put_peer(peer, rxrpc_peer_put_input_error); @@ -401,13 +406,8 @@ void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t a } max_data = umin(max_data, peer->ackr_max_data); - if (max_data != peer->max_data) { - preempt_disable(); - write_seqcount_begin(&peer->mtu_lock); + if (max_data != peer->max_data) peer->max_data = max_data; - write_seqcount_end(&peer->mtu_lock); - preempt_enable(); - } jumbo = max_data + sizeof(struct rxrpc_jumbo_header); jumbo /= RXRPC_JUMBO_SUBPKTLEN; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 0fcc87f0409f..56e09d161a97 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -235,7 +235,6 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); - seqcount_init(&peer->mtu_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); peer->recent_srtt_us = UINT_MAX; peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW; @@ -325,10 +324,10 @@ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) hash_key = rxrpc_peer_hash_key(local, &peer->srx); rxrpc_init_peer(local, peer, hash_key); - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive_new); - spin_unlock_bh(&rxnet->peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); } /* diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 62b09d23ec08..6cb37b0eb77f 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -257,8 +257,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, struct rxrpc_txbuf *txb, struct skcipher_request *req) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; - struct rxkad_level1_hdr *hdr = (void *)(whdr + 1); + struct rxkad_level1_hdr *hdr = txb->data; struct rxrpc_crypt iv; struct scatterlist sg; size_t pad; @@ -274,7 +273,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, pad = RXKAD_ALIGN - pad; pad &= RXKAD_ALIGN - 1; if (pad) { - memset(txb->kvec[0].iov_base + txb->offset, 0, pad); + memset(txb->data + txb->offset, 0, pad); txb->pkt_len += pad; } @@ -300,8 +299,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct skcipher_request *req) { const struct rxrpc_key_token *token; - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; - struct rxkad_level2_hdr *rxkhdr = (void *)(whdr + 1); + struct rxkad_level2_hdr *rxkhdr = txb->data; struct rxrpc_crypt iv; struct scatterlist sg; size_t content, pad; @@ -319,7 +317,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, txb->pkt_len = round_up(content, RXKAD_ALIGN); pad = txb->pkt_len - content; if (pad) - memset(txb->kvec[0].iov_base + txb->offset, 0, pad); + memset(txb->data + txb->offset, 0, pad); /* encrypt from the session key */ token = call->conn->key->payload.data[0]; @@ -407,9 +405,8 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) /* Clear excess space in the packet */ if (txb->pkt_len < txb->alloc_size) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; size_t gap = txb->alloc_size - txb->pkt_len; - void *p = whdr + 1; + void *p = txb->data; memset(p + txb->pkt_len, 0, gap); } diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c index 7ef93407be83..e848a4777b8c 100644 --- a/net/rxrpc/rxperf.c +++ b/net/rxrpc/rxperf.c @@ -478,6 +478,18 @@ static int rxperf_deliver_request(struct rxperf_call *call) call->unmarshal++; fallthrough; case 2: + ret = rxperf_extract_data(call, true); + if (ret < 0) + return ret; + + /* Deal with the terminal magic cookie. */ + call->iov_len = 4; + call->kvec[0].iov_len = call->iov_len; + call->kvec[0].iov_base = call->tmp; + iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len); + call->unmarshal++; + fallthrough; + case 3: ret = rxperf_extract_data(call, false); if (ret < 0) return ret; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0e8da909d4f2..84dc6c94f23b 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -419,7 +419,7 @@ reload: size_t copy = umin(txb->space, msg_data_left(msg)); _debug("add %zu", copy); - if (!copy_from_iter_full(txb->kvec[0].iov_base + txb->offset, + if (!copy_from_iter_full(txb->data + txb->offset, copy, &msg->msg_iter)) goto efault; _debug("added"); @@ -445,8 +445,6 @@ reload: ret = call->security->secure_packet(call, txb); if (ret < 0) goto out; - - txb->kvec[0].iov_len += txb->len; rxrpc_queue_packet(rx, call, txb, notify_end_tx); txb = NULL; } @@ -707,7 +705,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } else { switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_AWAIT_CONN: - case RXRPC_CALL_SERVER_SECURING: + case RXRPC_CALL_SERVER_RECV_REQUEST: if (p.command == RXRPC_CMD_SEND_ABORT) break; fallthrough; diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 131d9e55c8e9..c550991d48fa 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -19,17 +19,19 @@ atomic_t rxrpc_nr_txbuf; struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size, size_t data_align, gfp_t gfp) { - struct rxrpc_wire_header *whdr; struct rxrpc_txbuf *txb; - size_t total, hoff; + size_t total, doff, jsize = sizeof(struct rxrpc_jumbo_header); void *buf; txb = kzalloc(sizeof(*txb), gfp); if (!txb) return NULL; - hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); - total = hoff + sizeof(*whdr) + data_size; + /* We put a jumbo header in the buffer, but not a full wire header to + * avoid delayed-corruption problems with zerocopy. + */ + doff = round_up(jsize, data_align); + total = doff + data_size; data_align = umax(data_align, L1_CACHE_BYTES); mutex_lock(&call->conn->tx_data_alloc_lock); @@ -41,30 +43,15 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ return NULL; } - whdr = buf + hoff; - refcount_set(&txb->ref, 1); txb->call_debug_id = call->debug_id; txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); txb->alloc_size = data_size; txb->space = data_size; - txb->offset = sizeof(*whdr); + txb->offset = 0; txb->flags = call->conn->out_clientflag; txb->seq = call->send_top + 1; - txb->nr_kvec = 1; - txb->kvec[0].iov_base = whdr; - txb->kvec[0].iov_len = sizeof(*whdr); - - whdr->epoch = htonl(call->conn->proto.epoch); - whdr->cid = htonl(call->cid); - whdr->callNumber = htonl(call->call_id); - whdr->seq = htonl(txb->seq); - whdr->type = RXRPC_PACKET_TYPE_DATA; - whdr->flags = 0; - whdr->userStatus = 0; - whdr->securityIndex = call->security_ix; - whdr->_rsvd = 0; - whdr->serviceId = htons(call->dest_srx.srx_service); + txb->data = buf + doff; trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, rxrpc_txbuf_alloc_data); @@ -90,14 +77,10 @@ void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb) { - int i; - trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0, rxrpc_txbuf_free); - for (i = 0; i < txb->nr_kvec; i++) - if (txb->kvec[i].iov_base && - !is_zero_pfn(page_to_pfn(virt_to_page(txb->kvec[i].iov_base)))) - page_frag_free(txb->kvec[i].iov_base); + if (txb->data) + page_frag_free(txb->data); kfree(txb); atomic_dec(&rxrpc_nr_txbuf); } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8e47e5355be6..4f648af8cfaa 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -97,7 +97,7 @@ tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp, err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base, n, xa_limit_32b, &next, GFP_KERNEL); - if (err) + if (err < 0) goto err_xa_alloc; exts->miss_cookie_node = n; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b50b2c2cc09b..e6bfd39ff339 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; + if (unlikely(READ_ONCE(sch->limit) == 0)) + return qdisc_drop(skb, sch, to_free); + if (likely(sch->q.qlen < READ_ONCE(sch->limit))) return qdisc_enqueue_tail(skb, sch); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 71ec9986ed37..fdd79d3ccd8c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -749,9 +749,9 @@ deliver: if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, pkt_len); sch->qstats.backlog -= pkt_len; sch->q.qlen--; + qdisc_tree_reduce_backlog(sch, 1, pkt_len); } goto tfifo_dequeue; } diff --git a/net/sctp/stream.c b/net/sctp/stream.c index c241cc552e8d..bfcff6d6a438 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -735,7 +735,7 @@ struct sctp_chunk *sctp_process_strreset_tsnreq( * value SHOULD be the smallest TSN not acknowledged by the * receiver of the request plus 2^31. */ - init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1 << 31); + init_tsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map) + (1U << 31); sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, init_tsn, GFP_ATOMIC); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index ca6984541edb..3e6cb35baf25 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3337,10 +3337,7 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family) * which need net ref. */ sk = smc->clcsock->sk; - __netns_tracker_free(net, &sk->ns_tracker, false); - sk->sk_net_refcnt = 1; - get_net_track(net, &sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sk); return 0; } diff --git a/net/socket.c b/net/socket.c index 262a28b59c7f..28bae5a94234 100644 --- a/net/socket.c +++ b/net/socket.c @@ -479,6 +479,11 @@ struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) sock->file = file; file->private_data = sock; stream_open(SOCK_INODE(sock), file); + /* + * Disable permission and pre-content events, but enable legacy + * inotify events for legacy users. + */ + file_set_fsnotify_mode(file, FMODE_NONOTIFY_PERM); return file; } EXPORT_SYMBOL(sock_alloc_file); diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 8299ceb3e373..95696f42647e 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -347,7 +347,10 @@ static int strp_read_sock(struct strparser *strp) struct socket *sock = strp->sk->sk_socket; read_descriptor_t desc; - if (unlikely(!sock || !sock->ops || !sock->ops->read_sock)) + if (unlikely(!sock || !sock->ops)) + return -EBUSY; + + if (unlikely(!strp->cb.read_sock && !sock->ops->read_sock)) return -EBUSY; desc.arg.data = strp; @@ -355,7 +358,10 @@ static int strp_read_sock(struct strparser *strp) desc.count = 1; /* give more than one skb per call */ /* sk should be locked here, so okay to do read_sock */ - sock->ops->read_sock(strp->sk, &desc, strp_recv); + if (strp->cb.read_sock) + strp->cb.read_sock(strp, &desc, strp_recv); + else + sock->ops->read_sock(strp->sk, &desc, strp_recv); desc.error = strp->cb.read_sock_done(strp, desc.error); @@ -468,6 +474,7 @@ int strp_init(struct strparser *strp, struct sock *sk, strp->cb.unlock = cb->unlock ? : strp_sock_unlock; strp->cb.rcv_msg = cb->rcv_msg; strp->cb.parse_msg = cb->parse_msg; + strp->cb.read_sock = cb->read_sock; strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index cb279eb9ac4b..7ce5e28a6c03 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1674,12 +1674,14 @@ static void remove_cache_proc_entries(struct cache_detail *cd) } } -#ifdef CONFIG_PROC_FS static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) { struct proc_dir_entry *p; struct sunrpc_net *sn; + if (!IS_ENABLED(CONFIG_PROC_FS)) + return 0; + sn = net_generic(net, sunrpc_net_id); cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc); if (cd->procfs == NULL) @@ -1707,12 +1709,6 @@ out_nomem: remove_cache_proc_entries(cd); return -ENOMEM; } -#else /* CONFIG_PROC_FS */ -static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) -{ - return 0; -} -#endif void __init cache_initialize(void) { diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index cef623ea1506..9b45fbdc90ca 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -864,8 +864,6 @@ void rpc_signal_task(struct rpc_task *task) if (!rpc_task_set_rpc_status(task, -ERESTARTSYS)) return; trace_rpc_task_signalled(task, task->tk_action); - set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate); - smp_mb__after_atomic(); queue = READ_ONCE(task->tk_waitqueue); if (queue) rpc_wake_up_queued_task(queue, task); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cb3bd12f5818..72e5a01df3d3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1541,10 +1541,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, newlen = error; if (protocol == IPPROTO_TCP) { - __netns_tracker_free(net, &sock->sk->ns_tracker, false); - sock->sk->sk_net_refcnt = 1; - get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(net, 1); + sk_net_refcnt_upgrade(sock->sk); if ((error = kernel_listen(sock, 64)) < 0) goto bummer; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index c60936d8cef7..83cc095846d3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1941,12 +1941,8 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt, goto out; } - if (protocol == IPPROTO_TCP) { - __netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false); - sock->sk->sk_net_refcnt = 1; - get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL); - sock_inuse_add(xprt->xprt_net, 1); - } + if (protocol == IPPROTO_TCP) + sk_net_refcnt_upgrade(sock->sk); filp = sock_alloc_file(sock, O_NONBLOCK, NULL); if (IS_ERR(filp)) @@ -2581,7 +2577,15 @@ static void xs_tls_handshake_done(void *data, int status, key_serial_t peerid) struct sock_xprt *lower_transport = container_of(lower_xprt, struct sock_xprt, xprt); - lower_transport->xprt_err = status ? -EACCES : 0; + switch (status) { + case 0: + case -EACCES: + case -ETIMEDOUT: + lower_transport->xprt_err = status; + break; + default: + lower_transport->xprt_err = -EACCES; + } complete(&lower_transport->handshake_done); xprt_put(lower_xprt); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 34945de1fb1f..f0e613d97664 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2102,6 +2102,7 @@ restart_locked: goto out_sock_put; } + sock_put(other); goto lookup; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 075695173648..7e3db87ae433 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -824,13 +824,19 @@ static void __vsock_release(struct sock *sk, int level) */ lock_sock_nested(sk, level); - sock_orphan(sk); + /* Indicate to vsock_remove_sock() that the socket is being released and + * can be removed from the bound_table. Unlike transport reassignment + * case, where the socket must remain bound despite vsock_remove_sock() + * being called from the transport release() callback. + */ + sock_set_flag(sk, SOCK_DEAD); if (vsk->transport) vsk->transport->release(vsk); else if (sock_type_connectible(sk->sk_type)) vsock_remove_sock(vsk); + sock_orphan(sk); sk->sk_shutdown = SHUTDOWN_MASK; skb_queue_purge(&sk->sk_receive_queue); @@ -1183,6 +1189,9 @@ static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor) { struct vsock_sock *vsk = vsock_sk(sk); + if (WARN_ON_ONCE(!vsk->transport)) + return -ENODEV; + return vsk->transport->read_skb(vsk, read_actor); } diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index b58c3818f284..f0e48e6911fc 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -670,6 +670,13 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock) }; int ret; + mutex_lock(&vsock->rx_lock); + vsock->rx_buf_nr = 0; + vsock->rx_buf_max_nr = 0; + mutex_unlock(&vsock->rx_lock); + + atomic_set(&vsock->queued_replies, 0); + ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL); if (ret < 0) return ret; @@ -779,9 +786,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev) vsock->vdev = vdev; - vsock->rx_buf_nr = 0; - vsock->rx_buf_max_nr = 0; - atomic_set(&vsock->queued_replies, 0); mutex_init(&vsock->tx_lock); mutex_init(&vsock->rx_lock); diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index f201d9eca1df..07b96d56f3a5 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -87,7 +87,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, lock_sock(sk); vsk = vsock_sk(sk); - if (!vsk->transport) { + if (WARN_ON_ONCE(!vsk->transport)) { copied = -ENODEV; goto out; } diff --git a/rust/Makefile b/rust/Makefile index 8fcfd60447bc..ea3849eb78f6 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -144,7 +144,7 @@ rusttestlib-kernel: private rustc_target_flags = --extern ffi \ --extern bindings --extern uapi rusttestlib-kernel: $(src)/kernel/lib.rs \ rusttestlib-bindings rusttestlib-uapi rusttestlib-build_error \ - $(obj)/libmacros.so $(obj)/bindings.o FORCE + $(obj)/$(libmacros_name) $(obj)/bindings.o FORCE +$(call if_changed,rustc_test_library) rusttestlib-bindings: private rustc_target_flags = --extern ffi @@ -240,6 +240,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fzero-call-used-regs=% -fno-stack-clash-protection \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ -fstrict-flex-arrays=% -fmin-function-alignment=% \ + -fzero-init-padding-bits=% \ --param=% --param asan-% # Derived from `scripts/Makefile.clang`. @@ -331,7 +332,7 @@ $(obj)/bindings/bindings_helpers_generated.rs: private bindgen_target_extra = ; $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers/helpers.c FORCE $(call if_changed_dep,bindgen) -rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ { printf $(2),$$3 }' +rust_exports = $(NM) -p --defined-only $(1) | awk '$$2~/(T|R|D|B)/ && $$3!~/__cfi/ && $$3!~/__odr_asan/ { printf $(2),$$3 }' quiet_cmd_exports = EXPORTS $@ cmd_exports = \ diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index 55354e4dec14..f46cf3bb7069 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -11,6 +11,7 @@ #include <linux/blk_types.h> #include <linux/blkdev.h> #include <linux/cred.h> +#include <linux/device/faux.h> #include <linux/errname.h> #include <linux/ethtool.h> #include <linux/file.h> diff --git a/rust/kernel/faux.rs b/rust/kernel/faux.rs new file mode 100644 index 000000000000..5acc0c02d451 --- /dev/null +++ b/rust/kernel/faux.rs @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only + +//! Abstractions for the faux bus. +//! +//! This module provides bindings for working with faux devices in kernel modules. +//! +//! C header: [`include/linux/device/faux.h`] + +use crate::{bindings, device, error::code::*, prelude::*}; +use core::ptr::{addr_of_mut, null, null_mut, NonNull}; + +/// The registration of a faux device. +/// +/// This type represents the registration of a [`struct faux_device`]. When an instance of this type +/// is dropped, its respective faux device will be unregistered from the system. +/// +/// # Invariants +/// +/// `self.0` always holds a valid pointer to an initialized and registered [`struct faux_device`]. +/// +/// [`struct faux_device`]: srctree/include/linux/device/faux.h +#[repr(transparent)] +pub struct Registration(NonNull<bindings::faux_device>); + +impl Registration { + /// Create and register a new faux device with the given name. + pub fn new(name: &CStr) -> Result<Self> { + // SAFETY: + // - `name` is copied by this function into its own storage + // - `faux_ops` is safe to leave NULL according to the C API + let dev = unsafe { bindings::faux_device_create(name.as_char_ptr(), null_mut(), null()) }; + + // The above function will return either a valid device, or NULL on failure + // INVARIANT: The device will remain registered until faux_device_destroy() is called, which + // happens in our Drop implementation. + Ok(Self(NonNull::new(dev).ok_or(ENODEV)?)) + } + + fn as_raw(&self) -> *mut bindings::faux_device { + self.0.as_ptr() + } +} + +impl AsRef<device::Device> for Registration { + fn as_ref(&self) -> &device::Device { + // SAFETY: The underlying `device` in `faux_device` is guaranteed by the C API to be + // a valid initialized `device`. + unsafe { device::Device::as_ref(addr_of_mut!((*self.as_raw()).dev)) } + } +} + +impl Drop for Registration { + fn drop(&mut self) { + // SAFETY: `self.0` is a valid registered faux_device via our type invariants. + unsafe { bindings::faux_device_destroy(self.as_raw()) } + } +} + +// SAFETY: The faux device API is thread-safe as guaranteed by the device core, as long as +// faux_device_destroy() is guaranteed to only be called once - which is guaranteed by our type not +// having Copy/Clone. +unsafe impl Send for Registration {} + +// SAFETY: The faux device API is thread-safe as guaranteed by the device core, as long as +// faux_device_destroy() is guaranteed to only be called once - which is guaranteed by our type not +// having Copy/Clone. +unsafe impl Sync for Registration {} diff --git a/rust/kernel/init.rs b/rust/kernel/init.rs index 3f9236c1c9d5..7fd1ea8265a5 100644 --- a/rust/kernel/init.rs +++ b/rust/kernel/init.rs @@ -870,7 +870,7 @@ pub unsafe trait PinInit<T: ?Sized, E = Infallible>: Sized { /// use kernel::{types::Opaque, init::pin_init_from_closure}; /// #[repr(C)] /// struct RawFoo([u8; 16]); - /// extern { + /// extern "C" { /// fn init_foo(_: *mut RawFoo); /// } /// diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 496ed32b0911..398242f92a96 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -46,6 +46,7 @@ pub mod device_id; pub mod devres; pub mod driver; pub mod error; +pub mod faux; #[cfg(CONFIG_RUST_FW_LOADER_ABSTRACTIONS)] pub mod firmware; pub mod fs; diff --git a/rust/kernel/rbtree.rs b/rust/kernel/rbtree.rs index ee2731dad72d..0d1e75810664 100644 --- a/rust/kernel/rbtree.rs +++ b/rust/kernel/rbtree.rs @@ -1149,7 +1149,7 @@ pub struct VacantEntry<'a, K, V> { /// # Invariants /// - `parent` may be null if the new node becomes the root. /// - `child_field_of_parent` is a valid pointer to the left-child or right-child of `parent`. If `parent` is -/// null, it is a pointer to the root of the [`RBTree`]. +/// null, it is a pointer to the root of the [`RBTree`]. struct RawVacantEntry<'a, K, V> { rbtree: *mut RBTree<K, V>, /// The node that will become the parent of the new node if we insert one. diff --git a/samples/hid/Makefile b/samples/hid/Makefile index 8ea59e9631a3..db5a077c77fc 100644 --- a/samples/hid/Makefile +++ b/samples/hid/Makefile @@ -40,16 +40,17 @@ BPF_EXTRA_CFLAGS += -I$(srctree)/arch/mips/include/asm/mach-generic endif endif -TPROGS_CFLAGS += -Wall -O2 -TPROGS_CFLAGS += -Wmissing-prototypes -TPROGS_CFLAGS += -Wstrict-prototypes +COMMON_CFLAGS += -Wall -O2 +COMMON_CFLAGS += -Wmissing-prototypes +COMMON_CFLAGS += -Wstrict-prototypes +TPROGS_CFLAGS += $(COMMON_CFLAGS) TPROGS_CFLAGS += -I$(objtree)/usr/include TPROGS_CFLAGS += -I$(LIBBPF_INCLUDE) TPROGS_CFLAGS += -I$(srctree)/tools/include ifdef SYSROOT -TPROGS_CFLAGS += --sysroot=$(SYSROOT) +COMMON_CFLAGS += --sysroot=$(SYSROOT) TPROGS_LDFLAGS := -L$(SYSROOT)/usr/lib endif @@ -112,7 +113,7 @@ clean: $(LIBBPF): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) # Fix up variables inherited from Kbuild that tools/ build system won't like - $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(TPROGS_CFLAGS)" \ + $(MAKE) -C $(LIBBPF_SRC) RM='rm -rf' EXTRA_CFLAGS="$(COMMON_CFLAGS)" \ LDFLAGS=$(TPROGS_LDFLAGS) srctree=$(HID_SAMPLES_PATH)/../../ \ O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \ $@ install_headers @@ -163,7 +164,7 @@ $(obj)/hid_surface_dial.o: $(obj)/hid_surface_dial.skel.h VMLINUX_BTF_PATHS ?= $(abspath $(if $(O),$(O)/vmlinux)) \ $(abspath $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)) \ - $(abspath ./vmlinux) + $(abspath $(objtree)/vmlinux) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(obj)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) diff --git a/samples/rust/Kconfig b/samples/rust/Kconfig index 918dbead2c0b..3b6eae84b297 100644 --- a/samples/rust/Kconfig +++ b/samples/rust/Kconfig @@ -61,6 +61,16 @@ config SAMPLE_RUST_DRIVER_PLATFORM If unsure, say N. +config SAMPLE_RUST_DRIVER_FAUX + tristate "Faux Driver" + help + This option builds the Rust Faux driver sample. + + To compile this as a module, choose M here: + the module will be called rust_driver_faux. + + If unsure, say N. + config SAMPLE_RUST_HOSTPROGS bool "Host programs" help diff --git a/samples/rust/Makefile b/samples/rust/Makefile index 5a8ab0df0567..0dbc6d90f1ef 100644 --- a/samples/rust/Makefile +++ b/samples/rust/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_SAMPLE_RUST_MISC_DEVICE) += rust_misc_device.o obj-$(CONFIG_SAMPLE_RUST_PRINT) += rust_print.o obj-$(CONFIG_SAMPLE_RUST_DRIVER_PCI) += rust_driver_pci.o obj-$(CONFIG_SAMPLE_RUST_DRIVER_PLATFORM) += rust_driver_platform.o +obj-$(CONFIG_SAMPLE_RUST_DRIVER_FAUX) += rust_driver_faux.o rust_print-y := rust_print_main.o rust_print_events.o diff --git a/samples/rust/rust_driver_faux.rs b/samples/rust/rust_driver_faux.rs new file mode 100644 index 000000000000..048c6cb98b29 --- /dev/null +++ b/samples/rust/rust_driver_faux.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0-only + +//! Rust faux device sample. + +use kernel::{c_str, faux, prelude::*, Module}; + +module! { + type: SampleModule, + name: "rust_faux_driver", + author: "Lyude Paul", + description: "Rust faux device sample", + license: "GPL", +} + +struct SampleModule { + _reg: faux::Registration, +} + +impl Module for SampleModule { + fn init(_module: &'static ThisModule) -> Result<Self> { + pr_info!("Initialising Rust Faux Device Sample\n"); + + let reg = faux::Registration::new(c_str!("rust-faux-sample-device"))?; + + dev_info!(reg.as_ref(), "Hello from faux device!\n"); + + Ok(Self { _reg: reg }) + } +} diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index eb719f6d8d53..dc081cf46d21 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -31,6 +31,11 @@ KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds ifdef CONFIG_CC_IS_CLANG # The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable. KBUILD_CFLAGS += -Wno-gnu + +# Clang checks for overflow/truncation with '%p', while GCC does not: +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 +KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf) +KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf) else # gcc inanely warns about local variables called 'main' @@ -105,11 +110,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) -else -# Clang checks for overflow/truncation with '%p', while GCC does not: -# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111219 -KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow-non-kprintf) -KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation-non-kprintf) endif KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation) @@ -133,7 +133,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) KBUILD_CFLAGS += -Wno-enum-compare-conditional -KBUILD_CFLAGS += -Wno-enum-enum-conversion endif endif @@ -157,6 +156,10 @@ KBUILD_CFLAGS += -Wno-missing-field-initializers KBUILD_CFLAGS += -Wno-type-limits KBUILD_CFLAGS += -Wno-shift-negative-value +ifdef CONFIG_CC_IS_CLANG +KBUILD_CFLAGS += -Wno-enum-enum-conversion +endif + ifdef CONFIG_CC_IS_GCC KBUILD_CFLAGS += -Wno-maybe-uninitialized endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index ad55ef201aac..cad20f0e66ee 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -305,7 +305,7 @@ endef # These are shared by some Makefile.* files. ifdef CONFIG_LTO_CLANG -# Run $(LD) here to covert LLVM IR to ELF in the following cases: +# Run $(LD) here to convert LLVM IR to ELF in the following cases: # - when this object needs objtool processing, as objtool cannot process LLVM IR # - when this is a single-object module, as modpost cannot process LLVM IR cmd_ld_single = $(if $(objtool-enabled)$(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@) diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 0d00ac3723b5..4fd6b6ab3e32 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -165,6 +165,18 @@ impl KernelConfig { let option = "CONFIG_".to_owned() + option; self.0.contains_key(&option) } + + /// Is the rustc version at least `major.minor.patch`? + fn rustc_version_atleast(&self, major: u32, minor: u32, patch: u32) -> bool { + let check_version = 100000 * major + 100 * minor + patch; + let actual_version = self + .0 + .get("CONFIG_RUSTC_VERSION") + .unwrap() + .parse::<u32>() + .unwrap(); + check_version <= actual_version + } } fn main() { @@ -182,6 +194,9 @@ fn main() { } } else if cfg.has("X86_64") { ts.push("arch", "x86_64"); + if cfg.rustc_version_atleast(1, 86, 0) { + ts.push("rustc-abi", "x86-softfloat"); + } ts.push( "data-layout", "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", @@ -215,6 +230,9 @@ fn main() { panic!("32-bit x86 only works under UML"); } ts.push("arch", "x86"); + if cfg.rustc_version_atleast(1, 86, 0) { + ts.push("rustc-abi", "x86-softfloat"); + } ts.push( "data-layout", "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128", diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index e18ae7dc8140..c35d22607978 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -190,8 +190,8 @@ static struct module *new_module(const char *name, size_t namelen) /* * Set mod->is_gpl_compatible to true by default. If MODULE_LICENSE() - * is missing, do not check the use for EXPORT_SYMBOL_GPL() becasue - * modpost will exit wiht error anyway. + * is missing, do not check the use for EXPORT_SYMBOL_GPL() because + * modpost will exit with an error anyway. */ mod->is_gpl_compatible = true; @@ -507,6 +507,9 @@ static int parse_elf(struct elf_info *info, const char *filename) info->modinfo_len = sechdrs[i].sh_size; } else if (!strcmp(secname, ".export_symbol")) { info->export_symbol_secndx = i; + } else if (!strcmp(secname, ".no_trim_symbol")) { + info->no_trim_symbol = (void *)hdr + sechdrs[i].sh_offset; + info->no_trim_symbol_len = sechdrs[i].sh_size; } if (sechdrs[i].sh_type == SHT_SYMTAB) { @@ -1566,6 +1569,14 @@ static void read_symbols(const char *modname) /* strip trailing .o */ mod = new_module(modname, strlen(modname) - strlen(".o")); + /* save .no_trim_symbol section for later use */ + if (info.no_trim_symbol_len) { + mod->no_trim_symbol = xmalloc(info.no_trim_symbol_len); + memcpy(mod->no_trim_symbol, info.no_trim_symbol, + info.no_trim_symbol_len); + mod->no_trim_symbol_len = info.no_trim_symbol_len; + } + if (!mod->is_vmlinux) { license = get_modinfo(&info, "license"); if (!license) @@ -1728,6 +1739,28 @@ static void handle_white_list_exports(const char *white_list) free(buf); } +/* + * Keep symbols recorded in the .no_trim_symbol section. This is necessary to + * prevent CONFIG_TRIM_UNUSED_KSYMS from dropping EXPORT_SYMBOL because + * symbol_get() relies on the symbol being present in the ksymtab for lookups. + */ +static void keep_no_trim_symbols(struct module *mod) +{ + unsigned long size = mod->no_trim_symbol_len; + + for (char *s = mod->no_trim_symbol; s; s = next_string(s , &size)) { + struct symbol *sym; + + /* + * If find_symbol() returns NULL, this symbol is not provided + * by any module, and symbol_get() will fail. + */ + sym = find_symbol(s); + if (sym) + sym->used = true; + } +} + static void check_modname_len(struct module *mod) { const char *mod_name; @@ -2254,6 +2287,8 @@ int main(int argc, char **argv) read_symbols_from_files(files_source); list_for_each_entry(mod, &modules, list) { + keep_no_trim_symbols(mod); + if (mod->dump_file || mod->is_vmlinux) continue; diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index ffd0a52a606e..59366f456b76 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -111,6 +111,8 @@ struct module_alias { * * @dump_file: path to the .symvers file if loaded from a file * @aliases: list head for module_aliases + * @no_trim_symbol: .no_trim_symbol section data + * @no_trim_symbol_len: length of the .no_trim_symbol section */ struct module { struct list_head list; @@ -128,6 +130,8 @@ struct module { // Actual imported namespaces struct list_head imported_namespaces; struct list_head aliases; + char *no_trim_symbol; + unsigned int no_trim_symbol_len; char name[]; }; @@ -141,6 +145,8 @@ struct elf_info { char *strtab; char *modinfo; unsigned int modinfo_len; + char *no_trim_symbol; + unsigned int no_trim_symbol_len; /* support for 32bit section numbers */ diff --git a/scripts/module.lds.S b/scripts/module.lds.S index c2f80f9141d4..450f1088d5fd 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -16,6 +16,7 @@ SECTIONS { *(.discard) *(.discard.*) *(.export_symbol) + *(.no_trim_symbol) } __ksymtab 0 : ALIGN(8) { *(SORT(___ksymtab+*)) } diff --git a/scripts/package/install-extmod-build b/scripts/package/install-extmod-build index bb6e23c1174e..2966473b4660 100755 --- a/scripts/package/install-extmod-build +++ b/scripts/package/install-extmod-build @@ -62,8 +62,8 @@ if [ "${CC}" != "${HOSTCC}" ]; then # # Clear VPATH and srcroot because the source files reside in the output # directory. - # shellcheck disable=SC2016 # $(MAKE), $(CC), and $(build) will be expanded by Make - "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC=$(CC) VPATH= srcroot=. $(build)='"${destdir}"/scripts + # shellcheck disable=SC2016 # $(MAKE) and $(build) will be expanded by Make + "${MAKE}" run-command KBUILD_RUN_COMMAND='+$(MAKE) HOSTCC='"${CC}"' VPATH= srcroot=. $(build)='"${destdir}"/scripts rm -f "${destdir}/scripts/Kbuild" fi diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 7c06ffd633d2..a5e730ffda57 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -180,7 +180,7 @@ static void hmac_add_misc(struct shash_desc *desc, struct inode *inode, } /* - * Dump large security xattr values as a continuous ascii hexademical string. + * Dump large security xattr values as a continuous ascii hexadecimal string. * (pr_debug is limited to 64 bytes.) */ static void dump_security_xattr_l(const char *prefix, const void *src, diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 377e57e9084f..0add782e73ba 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -169,7 +169,7 @@ static int is_unsupported_hmac_fs(struct dentry *dentry) * and compare it against the stored security.evm xattr. * * For performance: - * - use the previoulsy retrieved xattr value and length to calculate the + * - use the previously retrieved xattr value and length to calculate the * HMAC.) * - cache the verification result in the iint, when available. * diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 24d09ea91b87..a4f284bd846c 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -149,6 +149,9 @@ struct ima_kexec_hdr { #define IMA_CHECK_BLACKLIST 0x40000000 #define IMA_VERITY_REQUIRED 0x80000000 +/* Exclude non-action flags which are not rule-specific. */ +#define IMA_NONACTION_RULE_FLAGS (IMA_NONACTION_FLAGS & ~IMA_NEW_FILE) + #define IMA_DO_MASK (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \ IMA_HASH | IMA_APPRAISE_SUBMASK) #define IMA_DONE_MASK (IMA_MEASURED | IMA_APPRAISED | IMA_AUDITED | \ diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index 9f9897a7c217..28b8b0db6f9b 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -269,10 +269,13 @@ static int process_measurement(struct file *file, const struct cred *cred, mutex_lock(&iint->mutex); if (test_and_clear_bit(IMA_CHANGE_ATTR, &iint->atomic_flags)) - /* reset appraisal flags if ima_inode_post_setattr was called */ + /* + * Reset appraisal flags (action and non-action rule-specific) + * if ima_inode_post_setattr was called. + */ iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED | IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK | - IMA_NONACTION_FLAGS); + IMA_NONACTION_RULE_FLAGS); /* * Re-evaulate the file if either the xattr has changed or the @@ -1011,9 +1014,9 @@ int process_buffer_measurement(struct mnt_idmap *idmap, } /* - * Both LSM hooks and auxilary based buffer measurements are - * based on policy. To avoid code duplication, differentiate - * between the LSM hooks and auxilary buffer measurements, + * Both LSM hooks and auxiliary based buffer measurements are + * based on policy. To avoid code duplication, differentiate + * between the LSM hooks and auxiliary buffer measurements, * retrieving the policy rule information only for the LSM hook * buffer measurements. */ diff --git a/security/landlock/net.c b/security/landlock/net.c index d5dcc4407a19..104b6c01fe50 100644 --- a/security/landlock/net.c +++ b/security/landlock/net.c @@ -63,8 +63,7 @@ static int current_check_access_socket(struct socket *const sock, if (WARN_ON_ONCE(dom->num_layers < 1)) return -EACCES; - /* Checks if it's a (potential) TCP socket. */ - if (sock->type != SOCK_STREAM) + if (!sk_is_tcp(sock->sk)) return 0; /* Checks for minimal header length to safely read sa_family. */ diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c index 241ce44375b6..bff4e40a3093 100644 --- a/security/landlock/ruleset.c +++ b/security/landlock/ruleset.c @@ -124,7 +124,7 @@ create_rule(const struct landlock_id id, return ERR_PTR(-ENOMEM); RB_CLEAR_NODE(&new_rule->node); if (is_object_pointer(id.type)) { - /* This should be catched by insert_rule(). */ + /* This should have been caught by insert_rule(). */ WARN_ON_ONCE(!id.key.object); landlock_get_object(id.key.object); } diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index d9fa69632147..0f78898bce09 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -1981,6 +1981,114 @@ static int tomoyo_truncate(char *str) } /** + * tomoyo_numscan - sscanf() which stores the length of a decimal integer value. + * + * @str: String to scan. + * @head: Leading string that must start with. + * @width: Pointer to "int" for storing length of a decimal integer value after @head. + * @tail: Optional character that must match after a decimal integer value. + * + * Returns whether @str starts with @head and a decimal value follows @head. + */ +static bool tomoyo_numscan(const char *str, const char *head, int *width, const char tail) +{ + const char *cp; + const int n = strlen(head); + + if (!strncmp(str, head, n)) { + cp = str + n; + while (*cp && *cp >= '0' && *cp <= '9') + cp++; + if (*cp == tail || !tail) { + *width = cp - (str + n); + return *width != 0; + } + } + *width = 0; + return 0; +} + +/** + * tomoyo_patternize_path - Make patterns for file path. Used by learning mode. + * + * @buffer: Destination buffer. + * @len: Size of @buffer. + * @entry: Original line. + * + * Returns nothing. + */ +static void tomoyo_patternize_path(char *buffer, const int len, char *entry) +{ + int width; + char *cp = entry; + + /* Nothing to do if this line is not for "file" related entry. */ + if (strncmp(entry, "file ", 5)) + goto flush; + /* + * Nothing to do if there is no colon in this line, for this rewriting + * applies to only filesystems where numeric values in the path are volatile. + */ + cp = strchr(entry + 5, ':'); + if (!cp) { + cp = entry; + goto flush; + } + /* Flush e.g. "file ioctl" part. */ + while (*cp != ' ') + cp--; + *cp++ = '\0'; + tomoyo_addprintf(buffer, len, "%s ", entry); + /* e.g. file ioctl pipe:[$INO] $CMD */ + if (tomoyo_numscan(cp, "pipe:[", &width, ']')) { + cp += width + 7; + tomoyo_addprintf(buffer, len, "pipe:[\\$]"); + goto flush; + } + /* e.g. file ioctl socket:[$INO] $CMD */ + if (tomoyo_numscan(cp, "socket:[", &width, ']')) { + cp += width + 9; + tomoyo_addprintf(buffer, len, "socket:[\\$]"); + goto flush; + } + if (!strncmp(cp, "proc:/self", 10)) { + /* e.g. file read proc:/self/task/$TID/fdinfo/$FD */ + cp += 10; + tomoyo_addprintf(buffer, len, "proc:/self"); + } else if (tomoyo_numscan(cp, "proc:/", &width, 0)) { + /* e.g. file read proc:/$PID/task/$TID/fdinfo/$FD */ + /* + * Don't patternize $PID part if $PID == 1, for several + * programs access only files in /proc/1/ directory. + */ + cp += width + 6; + if (width == 1 && *(cp - 1) == '1') + tomoyo_addprintf(buffer, len, "proc:/1"); + else + tomoyo_addprintf(buffer, len, "proc:/\\$"); + } else { + goto flush; + } + /* Patternize $TID part if "/task/" follows. */ + if (tomoyo_numscan(cp, "/task/", &width, 0)) { + cp += width + 6; + tomoyo_addprintf(buffer, len, "/task/\\$"); + } + /* Patternize $FD part if "/fd/" or "/fdinfo/" follows. */ + if (tomoyo_numscan(cp, "/fd/", &width, 0)) { + cp += width + 4; + tomoyo_addprintf(buffer, len, "/fd/\\$"); + } else if (tomoyo_numscan(cp, "/fdinfo/", &width, 0)) { + cp += width + 8; + tomoyo_addprintf(buffer, len, "/fdinfo/\\$"); + } +flush: + /* Flush remaining part if any. */ + if (*cp) + tomoyo_addprintf(buffer, len, "%s", cp); +} + +/** * tomoyo_add_entry - Add an ACL to current thread's domain. Used by learning mode. * * @domain: Pointer to "struct tomoyo_domain_info". @@ -2003,7 +2111,8 @@ static void tomoyo_add_entry(struct tomoyo_domain_info *domain, char *header) if (!cp) return; *cp++ = '\0'; - len = strlen(cp) + 1; + /* Reserve some space for potentially using patterns. */ + len = strlen(cp) + 16; /* strstr() will return NULL if ordering is wrong. */ if (*cp == 'f') { argv0 = strstr(header, " argv[]={ \""); @@ -2020,40 +2129,10 @@ static void tomoyo_add_entry(struct tomoyo_domain_info *domain, char *header) if (symlink) len += tomoyo_truncate(symlink + 1) + 1; } - buffer = kmalloc(len, GFP_NOFS); + buffer = kmalloc(len, GFP_NOFS | __GFP_ZERO); if (!buffer) return; - snprintf(buffer, len - 1, "%s", cp); - if (*cp == 'f' && strchr(buffer, ':')) { - /* Automatically replace 2 or more digits with \$ pattern. */ - char *cp2; - - /* e.g. file read proc:/$PID/stat */ - cp = strstr(buffer, " proc:/"); - if (cp && simple_strtoul(cp + 7, &cp2, 10) >= 10 && *cp2 == '/') { - *(cp + 7) = '\\'; - *(cp + 8) = '$'; - memmove(cp + 9, cp2, strlen(cp2) + 1); - goto ok; - } - /* e.g. file ioctl pipe:[$INO] $CMD */ - cp = strstr(buffer, " pipe:["); - if (cp && simple_strtoul(cp + 7, &cp2, 10) >= 10 && *cp2 == ']') { - *(cp + 7) = '\\'; - *(cp + 8) = '$'; - memmove(cp + 9, cp2, strlen(cp2) + 1); - goto ok; - } - /* e.g. file ioctl socket:[$INO] $CMD */ - cp = strstr(buffer, " socket:["); - if (cp && simple_strtoul(cp + 9, &cp2, 10) >= 10 && *cp2 == ']') { - *(cp + 9) = '\\'; - *(cp + 10) = '$'; - memmove(cp + 11, cp2, strlen(cp2) + 1); - goto ok; - } - } -ok: + tomoyo_patternize_path(buffer, len, cp); if (realpath) tomoyo_addprintf(buffer, len, " exec.%s", realpath); if (argv0) diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 3a7b0874cf44..5f9ccab26e9a 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -920,7 +920,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, #ifdef CONFIG_MMU /* * This is called at execve() time in order to dig around - * in the argv/environment of the new proceess + * in the argv/environment of the new process * (represented by bprm). */ mmap_read_lock(bprm->mm); diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c index a2705798476f..7e69747b2f77 100644 --- a/security/tomoyo/securityfs_if.c +++ b/security/tomoyo/securityfs_if.c @@ -229,11 +229,11 @@ static void __init tomoyo_create_entry(const char *name, const umode_t mode, } /** - * tomoyo_initerface_init - Initialize /sys/kernel/security/tomoyo/ interface. + * tomoyo_interface_init - Initialize /sys/kernel/security/tomoyo/ interface. * * Returns 0. */ -static int __init tomoyo_initerface_init(void) +static int __init tomoyo_interface_init(void) { struct tomoyo_domain_info *domain; struct dentry *tomoyo_dir; @@ -270,4 +270,4 @@ static int __init tomoyo_initerface_init(void) return 0; } -fs_initcall(tomoyo_initerface_init); +fs_initcall(tomoyo_interface_init); diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 04a92c3d65d4..d6ebcd9db80a 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -549,10 +549,7 @@ static const struct lsm_id tomoyo_lsmid = { .id = LSM_ID_TOMOYO, }; -/* - * tomoyo_security_ops is a "struct security_operations" which is used for - * registering TOMOYO. - */ +/* tomoyo_hooks is used for registering TOMOYO. */ static struct security_hook_list tomoyo_hooks[] __ro_after_init = { LSM_HOOK_INIT(cred_prepare, tomoyo_cred_prepare), LSM_HOOK_INIT(bprm_committed_creds, tomoyo_bprm_committed_creds), diff --git a/sound/core/hrtimer.c b/sound/core/hrtimer.c index 147c1fea4708..e9c60dce59fb 100644 --- a/sound/core/hrtimer.c +++ b/sound/core/hrtimer.c @@ -66,9 +66,8 @@ static int snd_hrtimer_open(struct snd_timer *t) stime = kzalloc(sizeof(*stime), GFP_KERNEL); if (!stime) return -ENOMEM; - hrtimer_init(&stime->hrt, CLOCK_MONOTONIC, HRTIMER_MODE_REL); stime->timer = t; - stime->hrt.function = snd_hrtimer_callback; + hrtimer_setup(&stime->hrt, snd_hrtimer_callback, CLOCK_MONOTONIC, HRTIMER_MODE_REL); t->private_data = stime; return 0; } diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 073b56dc2225..cb66ec42a3f8 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -678,12 +678,18 @@ static int snd_seq_deliver_single_event(struct snd_seq_client *client, dest_port->time_real); #if IS_ENABLED(CONFIG_SND_SEQ_UMP) - if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) { - if (snd_seq_ev_is_ump(event)) { + if (snd_seq_ev_is_ump(event)) { + if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) { result = snd_seq_deliver_from_ump(client, dest, dest_port, event, atomic, hop); goto __skip; - } else if (snd_seq_client_is_ump(dest)) { + } else if (dest->type == USER_CLIENT && + !snd_seq_client_is_ump(dest)) { + result = 0; // drop the event + goto __skip; + } + } else if (snd_seq_client_is_ump(dest)) { + if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) { result = snd_seq_deliver_to_ump(client, dest, dest_port, event, atomic, hop); goto __skip; diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index 8f5df9b3aaaa..c1a3efb633c5 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -457,8 +457,7 @@ static int dummy_hrtimer_create(struct snd_pcm_substream *substream) if (!dpcm) return -ENOMEM; substream->runtime->private_data = dpcm; - hrtimer_init(&dpcm->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); - dpcm->timer.function = dummy_hrtimer_callback; + hrtimer_setup(&dpcm->timer, dummy_hrtimer_callback, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); dpcm->substream = substream; atomic_set(&dpcm->running, 0); return 0; diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c index 78c9b1c7590f..e8482c2290c3 100644 --- a/sound/drivers/pcsp/pcsp.c +++ b/sound/drivers/pcsp/pcsp.c @@ -103,8 +103,7 @@ static int snd_card_pcsp_probe(int devnum, struct device *dev) if (devnum != 0) return -EINVAL; - hrtimer_init(&pcsp_chip.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - pcsp_chip.timer.function = pcsp_do_timer; + hrtimer_setup(&pcsp_chip.timer, pcsp_do_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); err = snd_devm_card_new(dev, index, id, THIS_MODULE, 0, &card); if (err < 0) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index f564ec7af194..ce3ae2cba660 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -539,6 +539,11 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = PCI_DEVICE_ID_INTEL_HDA_PTL, }, + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, + .device = PCI_DEVICE_ID_INTEL_HDA_PTL_H, + }, + #endif }; diff --git a/sound/pci/hda/cs35l56_hda_spi.c b/sound/pci/hda/cs35l56_hda_spi.c index d4ee5bb7c486..903578466905 100644 --- a/sound/pci/hda/cs35l56_hda_spi.c +++ b/sound/pci/hda/cs35l56_hda_spi.c @@ -22,6 +22,9 @@ static int cs35l56_hda_spi_probe(struct spi_device *spi) return -ENOMEM; cs35l56->base.dev = &spi->dev; + ret = cs35l56_init_config_for_spi(&cs35l56->base, spi); + if (ret) + return ret; #ifdef CS35L56_WAKE_HOLD_TIME_US cs35l56->base.can_hibernate = true; diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 14763c0f31ad..46a220404999 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2470,7 +2470,9 @@ int snd_hda_create_dig_out_ctls(struct hda_codec *codec, break; id = kctl->id; id.index = spdif_index; - snd_ctl_rename_id(codec->card, &kctl->id, &id); + err = snd_ctl_rename_id(codec->card, &kctl->id, &id); + if (err < 0) + return err; } bus->primary_dig_out_type = HDA_PCM_TYPE_HDMI; } diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 7d7f9aac50a9..67540e037309 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2496,6 +2496,8 @@ static const struct pci_device_id azx_ids[] = { { PCI_DEVICE_DATA(INTEL, HDA_ARL, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE) }, /* Panther Lake */ { PCI_DEVICE_DATA(INTEL, HDA_PTL, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_LNL) }, + /* Panther Lake-H */ + { PCI_DEVICE_DATA(INTEL, HDA_PTL_H, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_LNL) }, /* Apollolake (Broxton-P) */ { PCI_DEVICE_DATA(INTEL, HDA_APL, AZX_DRIVER_SKL | AZX_DCAPS_INTEL_BROXTON) }, /* Gemini-Lake */ diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 4985e72b9094..34874039ad45 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -1090,6 +1090,7 @@ static const struct hda_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x814f, "HP ZBook 15u G3", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x8174, "HP Spectre x360", CXT_FIXUP_HP_SPECTRE), SND_PCI_QUIRK(0x103c, 0x822e, "HP ProBook 440 G4", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x8231, "HP ProBook 450 G4", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c index 759f48038273..09240138e087 100644 --- a/sound/pci/hda/patch_cs8409-tables.c +++ b/sound/pci/hda/patch_cs8409-tables.c @@ -121,7 +121,7 @@ static const struct cs8409_i2c_param cs42l42_init_reg_seq[] = { { CS42L42_MIXER_CHA_VOL, 0x3F }, { CS42L42_MIXER_CHB_VOL, 0x3F }, { CS42L42_MIXER_ADC_VOL, 0x3f }, - { CS42L42_HP_CTL, 0x03 }, + { CS42L42_HP_CTL, 0x0D }, { CS42L42_MIC_DET_CTL1, 0xB6 }, { CS42L42_TIPSENSE_CTL, 0xC2 }, { CS42L42_HS_CLAMP_DISABLE, 0x01 }, @@ -131,7 +131,7 @@ static const struct cs8409_i2c_param cs42l42_init_reg_seq[] = { { CS42L42_RSENSE_CTL3, 0x00 }, { CS42L42_TSENSE_CTL, 0x80 }, { CS42L42_HS_BIAS_CTL, 0xC0 }, - { CS42L42_PWR_CTL1, 0x02 }, + { CS42L42_PWR_CTL1, 0x02, 10000 }, { CS42L42_ADC_OVFL_INT_MASK, 0xff }, { CS42L42_MIXER_INT_MASK, 0xff }, { CS42L42_SRC_INT_MASK, 0xff }, @@ -315,7 +315,7 @@ static const struct cs8409_i2c_param dolphin_c0_init_reg_seq[] = { { CS42L42_ASP_TX_SZ_EN, 0x01 }, { CS42L42_PWR_CTL1, 0x0A }, { CS42L42_PWR_CTL2, 0x84 }, - { CS42L42_HP_CTL, 0x03 }, + { CS42L42_HP_CTL, 0x0D }, { CS42L42_MIXER_CHA_VOL, 0x3F }, { CS42L42_MIXER_CHB_VOL, 0x3F }, { CS42L42_MIXER_ADC_VOL, 0x3f }, @@ -328,7 +328,7 @@ static const struct cs8409_i2c_param dolphin_c0_init_reg_seq[] = { { CS42L42_RSENSE_CTL3, 0x00 }, { CS42L42_TSENSE_CTL, 0x80 }, { CS42L42_HS_BIAS_CTL, 0xC0 }, - { CS42L42_PWR_CTL1, 0x02 }, + { CS42L42_PWR_CTL1, 0x02, 10000 }, { CS42L42_ADC_OVFL_INT_MASK, 0xff }, { CS42L42_MIXER_INT_MASK, 0xff }, { CS42L42_SRC_INT_MASK, 0xff }, @@ -371,7 +371,7 @@ static const struct cs8409_i2c_param dolphin_c1_init_reg_seq[] = { { CS42L42_ASP_TX_SZ_EN, 0x00 }, { CS42L42_PWR_CTL1, 0x0E }, { CS42L42_PWR_CTL2, 0x84 }, - { CS42L42_HP_CTL, 0x01 }, + { CS42L42_HP_CTL, 0x0D }, { CS42L42_MIXER_CHA_VOL, 0x3F }, { CS42L42_MIXER_CHB_VOL, 0x3F }, { CS42L42_MIXER_ADC_VOL, 0x3f }, @@ -384,7 +384,7 @@ static const struct cs8409_i2c_param dolphin_c1_init_reg_seq[] = { { CS42L42_RSENSE_CTL3, 0x00 }, { CS42L42_TSENSE_CTL, 0x80 }, { CS42L42_HS_BIAS_CTL, 0xC0 }, - { CS42L42_PWR_CTL1, 0x06 }, + { CS42L42_PWR_CTL1, 0x06, 10000 }, { CS42L42_ADC_OVFL_INT_MASK, 0xff }, { CS42L42_MIXER_INT_MASK, 0xff }, { CS42L42_SRC_INT_MASK, 0xff }, diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c index 614327218634..e50006757a2c 100644 --- a/sound/pci/hda/patch_cs8409.c +++ b/sound/pci/hda/patch_cs8409.c @@ -346,6 +346,11 @@ static int cs8409_i2c_bulk_write(struct sub_codec *scodec, const struct cs8409_i if (cs8409_i2c_wait_complete(codec) < 0) goto error; + /* Certain use cases may require a delay + * after a write operation before proceeding. + */ + if (seq[i].delay) + fsleep(seq[i].delay); } mutex_unlock(&spec->i2c_mux); @@ -876,7 +881,7 @@ static void cs42l42_resume(struct sub_codec *cs42l42) { CS42L42_DET_INT_STATUS2, 0x00 }, { CS42L42_TSRS_PLUG_STATUS, 0x00 }, }; - int fsv_old, fsv_new; + unsigned int fsv; /* Bring CS42L42 out of Reset */ spec->gpio_data = snd_hda_codec_read(codec, CS8409_PIN_AFG, 0, AC_VERB_GET_GPIO_DATA, 0); @@ -888,18 +893,19 @@ static void cs42l42_resume(struct sub_codec *cs42l42) /* Initialize CS42L42 companion codec */ cs8409_i2c_bulk_write(cs42l42, cs42l42->init_seq, cs42l42->init_seq_num); - msleep(CS42L42_INIT_TIMEOUT_MS); /* Clear interrupts, by reading interrupt status registers */ cs8409_i2c_bulk_read(cs42l42, irq_regs, ARRAY_SIZE(irq_regs)); - fsv_old = cs8409_i2c_read(cs42l42, CS42L42_HP_CTL); - if (cs42l42->full_scale_vol == CS42L42_FULL_SCALE_VOL_0DB) - fsv_new = fsv_old & ~CS42L42_FULL_SCALE_VOL_MASK; - else - fsv_new = fsv_old & CS42L42_FULL_SCALE_VOL_MASK; - if (fsv_new != fsv_old) - cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv_new); + fsv = cs8409_i2c_read(cs42l42, CS42L42_HP_CTL); + if (cs42l42->full_scale_vol) { + // Set the full scale volume bit + fsv |= CS42L42_FULL_SCALE_VOL_MASK; + cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv); + } + // Unmute analog channels A and B + fsv = (fsv & ~CS42L42_ANA_MUTE_AB); + cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv); /* we have to explicitly allow unsol event handling even during the * resume phase so that the jack event is processed properly @@ -920,7 +926,7 @@ static void cs42l42_suspend(struct sub_codec *cs42l42) { CS42L42_MIXER_CHA_VOL, 0x3F }, { CS42L42_MIXER_ADC_VOL, 0x3F }, { CS42L42_MIXER_CHB_VOL, 0x3F }, - { CS42L42_HP_CTL, 0x0F }, + { CS42L42_HP_CTL, 0x0D }, { CS42L42_ASP_RX_DAI0_EN, 0x00 }, { CS42L42_ASP_CLK_CFG, 0x00 }, { CS42L42_PWR_CTL1, 0xFE }, diff --git a/sound/pci/hda/patch_cs8409.h b/sound/pci/hda/patch_cs8409.h index 5e48115caf09..e4bd2e12110b 100644 --- a/sound/pci/hda/patch_cs8409.h +++ b/sound/pci/hda/patch_cs8409.h @@ -229,10 +229,10 @@ enum cs8409_coefficient_index_registers { #define CS42L42_I2C_SLEEP_US (2000) #define CS42L42_PDN_TIMEOUT_US (250000) #define CS42L42_PDN_SLEEP_US (2000) -#define CS42L42_INIT_TIMEOUT_MS (45) +#define CS42L42_ANA_MUTE_AB (0x0C) #define CS42L42_FULL_SCALE_VOL_MASK (2) -#define CS42L42_FULL_SCALE_VOL_0DB (1) -#define CS42L42_FULL_SCALE_VOL_MINUS6DB (0) +#define CS42L42_FULL_SCALE_VOL_0DB (0) +#define CS42L42_FULL_SCALE_VOL_MINUS6DB (1) /* Dell BULLSEYE / WARLOCK / CYBORG Specific Definitions */ @@ -290,6 +290,7 @@ enum { struct cs8409_i2c_param { unsigned int addr; unsigned int value; + unsigned int delay; }; struct cs8409_cir_param { diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8192be394d0d..c735f630ecb5 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3788,6 +3788,7 @@ static void alc225_init(struct hda_codec *codec) AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE); msleep(75); + alc_update_coef_idx(codec, 0x4a, 3 << 10, 0); alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x4); /* Hight power */ } } @@ -10622,6 +10623,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1433, "ASUS GX650PY/PZ/PV/PU/PYV/PZV/PIV/PVV", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), + SND_PCI_QUIRK(0x1043, 0x1460, "Asus VivoBook 15", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1463, "Asus GA402X/GA402N", ALC285_FIXUP_ASUS_I2C_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1473, "ASUS GU604VI/VC/VE/VG/VJ/VQ/VU/VV/VY/VZ", ALC285_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1483, "ASUS GU603VQ/VU/VV/VJ/VI", ALC285_FIXUP_ASUS_HEADSET_MIC), @@ -10655,7 +10657,6 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW), - SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1a63, "ASUS UX3405MA", ALC245_FIXUP_CS35L41_SPI_2), SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2), @@ -11045,6 +11046,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1f66, 0x0105, "Ayaneo Portable Game Player", ALC287_FIXUP_CS35L41_I2C_2), + SND_PCI_QUIRK(0x2014, 0x800a, "Positivo ARN50", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0214, "VAIO VJFE-CL", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x2782, 0x0228, "Infinix ZERO BOOK 13", ALC269VB_FIXUP_INFINIX_ZERO_BOOK_13), SND_PCI_QUIRK(0x2782, 0x0232, "CHUWI CoreBook XPro", ALC269VB_FIXUP_CHUWI_COREBOOK_XPRO), diff --git a/sound/pci/hda/tas2781_hda_spi.c b/sound/pci/hda/tas2781_hda_spi.c index a42fa990e7b9..04db80af53c0 100644 --- a/sound/pci/hda/tas2781_hda_spi.c +++ b/sound/pci/hda/tas2781_hda_spi.c @@ -912,7 +912,7 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) struct tasdevice_priv *tas_priv = context; struct tas2781_hda *tas_hda = dev_get_drvdata(tas_priv->dev); struct hda_codec *codec = tas_priv->codec; - int i, j, ret; + int i, j, ret, val; pm_runtime_get_sync(tas_priv->dev); guard(mutex)(&tas_priv->codec_lock); @@ -981,13 +981,16 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) /* Perform AMP reset before firmware download. */ tas_priv->rcabin.profile_cfg_id = TAS2781_PRE_POST_RESET_CFG; - tasdevice_spi_tuning_switch(tas_priv, 0); tas2781_spi_reset(tas_priv); tas_priv->rcabin.profile_cfg_id = 0; - tasdevice_spi_tuning_switch(tas_priv, 1); tas_priv->fw_state = TASDEVICE_DSP_FW_ALL_OK; - ret = tasdevice_spi_prmg_load(tas_priv, 0); + ret = tasdevice_spi_dev_read(tas_priv, TAS2781_REG_CLK_CONFIG, &val); + if (ret < 0) + goto out; + + if (val == TAS2781_REG_CLK_CONFIG_RESET) + ret = tasdevice_spi_prmg_load(tas_priv, 0); if (ret < 0) { dev_err(tas_priv->dev, "FW download failed = %d\n", ret); goto out; @@ -1001,7 +1004,6 @@ static void tasdev_fw_ready(const struct firmware *fmw, void *context) * If calibrated data occurs error, dsp will still works with default * calibrated data inside algo. */ - tas_priv->save_calibration(tas_priv); out: if (fmw) @@ -1160,7 +1162,8 @@ static int tas2781_runtime_suspend(struct device *dev) guard(mutex)(&tas_hda->priv->codec_lock); - tasdevice_spi_tuning_switch(tas_hda->priv, 1); + if (tas_hda->priv->playback_started) + tasdevice_spi_tuning_switch(tas_hda->priv, 1); tas_hda->priv->cur_book = -1; tas_hda->priv->cur_conf = -1; @@ -1174,7 +1177,8 @@ static int tas2781_runtime_resume(struct device *dev) guard(mutex)(&tas_hda->priv->codec_lock); - tasdevice_spi_tuning_switch(tas_hda->priv, 0); + if (tas_hda->priv->playback_started) + tasdevice_spi_tuning_switch(tas_hda->priv, 0); return 0; } @@ -1189,12 +1193,9 @@ static int tas2781_system_suspend(struct device *dev) return ret; /* Shutdown chip before system suspend */ - tasdevice_spi_tuning_switch(tas_hda->priv, 1); - tas2781_spi_reset(tas_hda->priv); - /* - * Reset GPIO may be shared, so cannot reset here. - * However beyond this point, amps may be powered down. - */ + if (tas_hda->priv->playback_started) + tasdevice_spi_tuning_switch(tas_hda->priv, 1); + return 0; } diff --git a/sound/pci/hda/tas2781_spi_fwlib.c b/sound/pci/hda/tas2781_spi_fwlib.c index 0e2acbc3c900..131d9a77d140 100644 --- a/sound/pci/hda/tas2781_spi_fwlib.c +++ b/sound/pci/hda/tas2781_spi_fwlib.c @@ -2,7 +2,7 @@ // // TAS2781 HDA SPI driver // -// Copyright 2024 Texas Instruments, Inc. +// Copyright 2024-2025 Texas Instruments, Inc. // // Author: Baojun Xu <baojun.xu@ti.com> @@ -771,19 +771,19 @@ static int tasdevice_process_block(void *context, unsigned char *data, switch (subblk_typ) { case TASDEVICE_CMD_SING_W: subblk_offset = tasdevice_single_byte_wr(tas_priv, - dev_idx & 0x4f, data, sublocksize); + dev_idx & 0x3f, data, sublocksize); break; case TASDEVICE_CMD_BURST: subblk_offset = tasdevice_burst_wr(tas_priv, - dev_idx & 0x4f, data, sublocksize); + dev_idx & 0x3f, data, sublocksize); break; case TASDEVICE_CMD_DELAY: subblk_offset = tasdevice_delay(tas_priv, - dev_idx & 0x4f, data, sublocksize); + dev_idx & 0x3f, data, sublocksize); break; case TASDEVICE_CMD_FIELD_W: subblk_offset = tasdevice_field_wr(tas_priv, - dev_idx & 0x4f, data, sublocksize); + dev_idx & 0x3f, data, sublocksize); break; default: subblk_offset = 2; diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c index 3f5422145c5e..84a4b17a0cc2 100644 --- a/sound/sh/sh_dac_audio.c +++ b/sound/sh/sh_dac_audio.c @@ -312,8 +312,7 @@ static int snd_sh_dac_create(struct snd_card *card, chip->card = card; - hrtimer_init(&chip->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - chip->hrtimer.function = sh_dac_audio_timer; + hrtimer_setup(&chip->hrtimer, sh_dac_audio_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); dac_audio_reset(chip); chip->rate = 8000; diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c index 402b9a2ff024..68cdb1027d0c 100644 --- a/sound/soc/codecs/arizona.c +++ b/sound/soc/codecs/arizona.c @@ -967,7 +967,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, case ARIZONA_OUT3L_ENA_SHIFT: case ARIZONA_OUT3R_ENA_SHIFT: priv->out_up_pending++; - priv->out_up_delay += 17; + priv->out_up_delay += 17000; break; case ARIZONA_OUT4L_ENA_SHIFT: case ARIZONA_OUT4R_ENA_SHIFT: @@ -977,7 +977,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, case WM8997: break; default: - priv->out_up_delay += 10; + priv->out_up_delay += 10000; break; } break; @@ -999,7 +999,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, if (!priv->out_up_pending && priv->out_up_delay) { dev_dbg(component->dev, "Power up delay: %d\n", priv->out_up_delay); - msleep(priv->out_up_delay); + fsleep(priv->out_up_delay); priv->out_up_delay = 0; } break; @@ -1017,7 +1017,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, case ARIZONA_OUT3L_ENA_SHIFT: case ARIZONA_OUT3R_ENA_SHIFT: priv->out_down_pending++; - priv->out_down_delay++; + priv->out_down_delay += 1000; break; case ARIZONA_OUT4L_ENA_SHIFT: case ARIZONA_OUT4R_ENA_SHIFT: @@ -1028,10 +1028,10 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, break; case WM8998: case WM1814: - priv->out_down_delay += 5; + priv->out_down_delay += 5000; break; default: - priv->out_down_delay++; + priv->out_down_delay += 1000; break; } break; @@ -1053,7 +1053,7 @@ int arizona_out_ev(struct snd_soc_dapm_widget *w, if (!priv->out_down_pending && priv->out_down_delay) { dev_dbg(component->dev, "Power down delay: %d\n", priv->out_down_delay); - msleep(priv->out_down_delay); + fsleep(priv->out_down_delay); priv->out_down_delay = 0; } break; diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c index 07a5cab35fe1..ff4134bee858 100644 --- a/sound/soc/codecs/cs35l41.c +++ b/sound/soc/codecs/cs35l41.c @@ -1148,21 +1148,31 @@ err_dsp: return ret; } +#ifdef CONFIG_ACPI static int cs35l41_acpi_get_name(struct cs35l41_private *cs35l41) { - acpi_handle handle = ACPI_HANDLE(cs35l41->dev); + struct acpi_device *adev = ACPI_COMPANION(cs35l41->dev); + acpi_handle handle = acpi_device_handle(adev); + const char *hid; const char *sub; - /* If there is no ACPI_HANDLE, there is no ACPI for this system, return 0 */ - if (!handle) + /* If there is no acpi_device, there is no ACPI for this system, return 0 */ + if (!adev) return 0; sub = acpi_get_subsystem_id(handle); if (IS_ERR(sub)) { - /* If bad ACPI, return 0 and fallback to legacy firmware path, otherwise fail */ - if (PTR_ERR(sub) == -ENODATA) - return 0; - else + /* If no _SUB, fallback to _HID, otherwise fail */ + if (PTR_ERR(sub) == -ENODATA) { + hid = acpi_device_hid(adev); + /* If dummy hid, return 0 and fallback to legacy firmware path */ + if (!strcmp(hid, "device")) + return 0; + sub = kstrdup(hid, GFP_KERNEL); + if (!sub) + sub = ERR_PTR(-ENOMEM); + + } else return PTR_ERR(sub); } @@ -1171,6 +1181,12 @@ static int cs35l41_acpi_get_name(struct cs35l41_private *cs35l41) return 0; } +#else +static int cs35l41_acpi_get_name(struct cs35l41_private *cs35l41) +{ + return 0; +} +#endif /* CONFIG_ACPI */ int cs35l41_probe(struct cs35l41_private *cs35l41, const struct cs35l41_hw_cfg *hw_cfg) { diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index e0ed4fc11155..e28bfefa72f3 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -10,6 +10,7 @@ #include <linux/gpio/consumer.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> +#include <linux/spi/spi.h> #include <linux/types.h> #include <sound/cs-amp-lib.h> @@ -303,6 +304,79 @@ void cs35l56_wait_min_reset_pulse(void) } EXPORT_SYMBOL_NS_GPL(cs35l56_wait_min_reset_pulse, "SND_SOC_CS35L56_SHARED"); +static const struct { + u32 addr; + u32 value; +} cs35l56_spi_system_reset_stages[] = { + { .addr = CS35L56_DSP_VIRTUAL1_MBOX_1, .value = CS35L56_MBOX_CMD_SYSTEM_RESET }, + /* The next write is necessary to delimit the soft reset */ + { .addr = CS35L56_DSP_MBOX_1_RAW, .value = CS35L56_MBOX_CMD_PING }, +}; + +static void cs35l56_spi_issue_bus_locked_reset(struct cs35l56_base *cs35l56_base, + struct spi_device *spi) +{ + struct cs35l56_spi_payload *buf = cs35l56_base->spi_payload_buf; + struct spi_transfer t = { + .tx_buf = buf, + .len = sizeof(*buf), + }; + struct spi_message m; + int i, ret; + + for (i = 0; i < ARRAY_SIZE(cs35l56_spi_system_reset_stages); i++) { + buf->addr = cpu_to_be32(cs35l56_spi_system_reset_stages[i].addr); + buf->value = cpu_to_be32(cs35l56_spi_system_reset_stages[i].value); + spi_message_init_with_transfers(&m, &t, 1); + ret = spi_sync_locked(spi, &m); + if (ret) + dev_warn(cs35l56_base->dev, "spi_sync failed: %d\n", ret); + + usleep_range(CS35L56_SPI_RESET_TO_PORT_READY_US, + 2 * CS35L56_SPI_RESET_TO_PORT_READY_US); + } +} + +static void cs35l56_spi_system_reset(struct cs35l56_base *cs35l56_base) +{ + struct spi_device *spi = to_spi_device(cs35l56_base->dev); + unsigned int val; + int read_ret, ret; + + /* + * There must not be any other SPI bus activity while the amp is + * soft-resetting. + */ + ret = spi_bus_lock(spi->controller); + if (ret) { + dev_warn(cs35l56_base->dev, "spi_bus_lock failed: %d\n", ret); + return; + } + + cs35l56_spi_issue_bus_locked_reset(cs35l56_base, spi); + spi_bus_unlock(spi->controller); + + /* + * Check firmware boot by testing for a response in MBOX_2. + * HALO_STATE cannot be trusted yet because the reset sequence + * can leave it with stale state. But MBOX is reset. + * The regmap must remain in cache-only until the chip has + * booted, so use a bypassed read. + */ + ret = read_poll_timeout(regmap_read_bypassed, read_ret, + (val > 0) && (val < 0xffffffff), + CS35L56_HALO_STATE_POLL_US, + CS35L56_HALO_STATE_TIMEOUT_US, + false, + cs35l56_base->regmap, + CS35L56_DSP_VIRTUAL1_MBOX_2, + &val); + if (ret) { + dev_err(cs35l56_base->dev, "SPI reboot timed out(%d): MBOX2=%#x\n", + read_ret, val); + } +} + static const struct reg_sequence cs35l56_system_reset_seq[] = { REG_SEQ0(CS35L56_DSP1_HALO_STATE, 0), REG_SEQ0(CS35L56_DSP_VIRTUAL1_MBOX_1, CS35L56_MBOX_CMD_SYSTEM_RESET), @@ -315,6 +389,12 @@ void cs35l56_system_reset(struct cs35l56_base *cs35l56_base, bool is_soundwire) * accesses other than the controlled system reset sequence below. */ regcache_cache_only(cs35l56_base->regmap, true); + + if (cs35l56_is_spi(cs35l56_base)) { + cs35l56_spi_system_reset(cs35l56_base); + return; + } + regmap_multi_reg_write_bypassed(cs35l56_base->regmap, cs35l56_system_reset_seq, ARRAY_SIZE(cs35l56_system_reset_seq)); diff --git a/sound/soc/codecs/cs35l56-spi.c b/sound/soc/codecs/cs35l56-spi.c index c101134e8532..ca6c03a8766d 100644 --- a/sound/soc/codecs/cs35l56-spi.c +++ b/sound/soc/codecs/cs35l56-spi.c @@ -33,6 +33,9 @@ static int cs35l56_spi_probe(struct spi_device *spi) cs35l56->base.dev = &spi->dev; cs35l56->base.can_hibernate = true; + ret = cs35l56_init_config_for_spi(&cs35l56->base, spi); + if (ret) + return ret; ret = cs35l56_common_probe(cs35l56); if (ret != 0) diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c index f3c97da798dc..76159c45e6b5 100644 --- a/sound/soc/codecs/es8328.c +++ b/sound/soc/codecs/es8328.c @@ -233,7 +233,6 @@ static const struct snd_kcontrol_new es8328_right_line_controls = /* Left Mixer */ static const struct snd_kcontrol_new es8328_left_mixer_controls[] = { - SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL17, 7, 1, 0), SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL17, 6, 1, 0), SOC_DAPM_SINGLE("Right Playback Switch", ES8328_DACCONTROL18, 7, 1, 0), SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL18, 6, 1, 0), @@ -243,7 +242,6 @@ static const struct snd_kcontrol_new es8328_left_mixer_controls[] = { static const struct snd_kcontrol_new es8328_right_mixer_controls[] = { SOC_DAPM_SINGLE("Left Playback Switch", ES8328_DACCONTROL19, 7, 1, 0), SOC_DAPM_SINGLE("Left Bypass Switch", ES8328_DACCONTROL19, 6, 1, 0), - SOC_DAPM_SINGLE("Playback Switch", ES8328_DACCONTROL20, 7, 1, 0), SOC_DAPM_SINGLE("Right Bypass Switch", ES8328_DACCONTROL20, 6, 1, 0), }; @@ -336,10 +334,10 @@ static const struct snd_soc_dapm_widget es8328_dapm_widgets[] = { SND_SOC_DAPM_DAC("Left DAC", "Left Playback", ES8328_DACPOWER, ES8328_DACPOWER_LDAC_OFF, 1), - SND_SOC_DAPM_MIXER("Left Mixer", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_MIXER("Left Mixer", ES8328_DACCONTROL17, 7, 0, &es8328_left_mixer_controls[0], ARRAY_SIZE(es8328_left_mixer_controls)), - SND_SOC_DAPM_MIXER("Right Mixer", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_MIXER("Right Mixer", ES8328_DACCONTROL20, 7, 0, &es8328_right_mixer_controls[0], ARRAY_SIZE(es8328_right_mixer_controls)), @@ -418,19 +416,14 @@ static const struct snd_soc_dapm_route es8328_dapm_routes[] = { { "Right Line Mux", "PGA", "Right PGA Mux" }, { "Right Line Mux", "Differential", "Differential Mux" }, - { "Left Out 1", NULL, "Left DAC" }, - { "Right Out 1", NULL, "Right DAC" }, - { "Left Out 2", NULL, "Left DAC" }, - { "Right Out 2", NULL, "Right DAC" }, - - { "Left Mixer", "Playback Switch", "Left DAC" }, + { "Left Mixer", NULL, "Left DAC" }, { "Left Mixer", "Left Bypass Switch", "Left Line Mux" }, { "Left Mixer", "Right Playback Switch", "Right DAC" }, { "Left Mixer", "Right Bypass Switch", "Right Line Mux" }, { "Right Mixer", "Left Playback Switch", "Left DAC" }, { "Right Mixer", "Left Bypass Switch", "Left Line Mux" }, - { "Right Mixer", "Playback Switch", "Right DAC" }, + { "Right Mixer", NULL, "Right DAC" }, { "Right Mixer", "Right Bypass Switch", "Right Line Mux" }, { "DAC DIG", NULL, "DAC STM" }, diff --git a/sound/soc/codecs/madera.c b/sound/soc/codecs/madera.c index a840a2eb92b9..af109761f359 100644 --- a/sound/soc/codecs/madera.c +++ b/sound/soc/codecs/madera.c @@ -2323,10 +2323,10 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case CS42L92: case CS47L92: case CS47L93: - out_up_delay = 6; + out_up_delay = 6000; break; default: - out_up_delay = 17; + out_up_delay = 17000; break; } @@ -2357,7 +2357,7 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case MADERA_OUT3R_ENA_SHIFT: priv->out_up_pending--; if (!priv->out_up_pending) { - msleep(priv->out_up_delay); + fsleep(priv->out_up_delay); priv->out_up_delay = 0; } break; @@ -2376,7 +2376,7 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case MADERA_OUT3L_ENA_SHIFT: case MADERA_OUT3R_ENA_SHIFT: priv->out_down_pending++; - priv->out_down_delay++; + priv->out_down_delay += 1000; break; default: break; @@ -2393,7 +2393,7 @@ int madera_out_ev(struct snd_soc_dapm_widget *w, case MADERA_OUT3R_ENA_SHIFT: priv->out_down_pending--; if (!priv->out_down_pending) { - msleep(priv->out_down_delay); + fsleep(priv->out_down_delay); priv->out_down_delay = 0; } break; diff --git a/sound/soc/codecs/tas2764.c b/sound/soc/codecs/tas2764.c index d482cd194c08..58315eab492a 100644 --- a/sound/soc/codecs/tas2764.c +++ b/sound/soc/codecs/tas2764.c @@ -365,7 +365,7 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) { struct snd_soc_component *component = dai->component; struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component); - u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0; + u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0, asi_cfg_4 = 0; int ret; switch (fmt & SND_SOC_DAIFMT_INV_MASK) { @@ -374,12 +374,14 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) fallthrough; case SND_SOC_DAIFMT_NB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_RISING; + asi_cfg_4 = TAS2764_TDM_CFG4_TX_FALLING; break; case SND_SOC_DAIFMT_IB_IF: asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; fallthrough; case SND_SOC_DAIFMT_IB_NF: asi_cfg_1 = TAS2764_TDM_CFG1_RX_FALLING; + asi_cfg_4 = TAS2764_TDM_CFG4_TX_RISING; break; } @@ -389,6 +391,12 @@ static int tas2764_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) if (ret < 0) return ret; + ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG4, + TAS2764_TDM_CFG4_TX_MASK, + asi_cfg_4); + if (ret < 0) + return ret; + switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START; diff --git a/sound/soc/codecs/tas2764.h b/sound/soc/codecs/tas2764.h index 168af772a898..9490f2686e38 100644 --- a/sound/soc/codecs/tas2764.h +++ b/sound/soc/codecs/tas2764.h @@ -25,7 +25,7 @@ /* Power Control */ #define TAS2764_PWR_CTRL TAS2764_REG(0X0, 0x02) -#define TAS2764_PWR_CTRL_MASK GENMASK(1, 0) +#define TAS2764_PWR_CTRL_MASK GENMASK(2, 0) #define TAS2764_PWR_CTRL_ACTIVE 0x0 #define TAS2764_PWR_CTRL_MUTE BIT(0) #define TAS2764_PWR_CTRL_SHUTDOWN BIT(1) @@ -79,6 +79,12 @@ #define TAS2764_TDM_CFG3_RXS_SHIFT 0x4 #define TAS2764_TDM_CFG3_MASK GENMASK(3, 0) +/* TDM Configuration Reg4 */ +#define TAS2764_TDM_CFG4 TAS2764_REG(0X0, 0x0d) +#define TAS2764_TDM_CFG4_TX_MASK BIT(0) +#define TAS2764_TDM_CFG4_TX_RISING 0x0 +#define TAS2764_TDM_CFG4_TX_FALLING BIT(0) + /* TDM Configuration Reg5 */ #define TAS2764_TDM_CFG5 TAS2764_REG(0X0, 0x0e) #define TAS2764_TDM_CFG5_VSNS_MASK BIT(6) diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index 9f93b230652a..863c3f672ba9 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -506,7 +506,7 @@ static int tas2770_codec_probe(struct snd_soc_component *component) } static DECLARE_TLV_DB_SCALE(tas2770_digital_tlv, 1100, 50, 0); -static DECLARE_TLV_DB_SCALE(tas2770_playback_volume, -12750, 50, 0); +static DECLARE_TLV_DB_SCALE(tas2770_playback_volume, -10050, 50, 0); static const struct snd_kcontrol_new tas2770_snd_controls[] = { SOC_SINGLE_TLV("Speaker Playback Volume", TAS2770_PLAY_CFG_REG2, diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index a730ab6ad4e3..90c5b2e74d12 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -2,7 +2,7 @@ // // ALSA SoC Texas Instruments TAS2563/TAS2781 Audio Smart Amplifier // -// Copyright (C) 2022 - 2024 Texas Instruments Incorporated +// Copyright (C) 2022 - 2025 Texas Instruments Incorporated // https://www.ti.com // // The TAS2563/TAS2781 driver implements a flexible and configurable @@ -1260,8 +1260,6 @@ static int tasdevice_create_cali_ctrls(struct tasdevice_priv *priv) (cali_data->cali_dat_sz_per_dev + 1) + 1 + 15 + 1; priv->cali_data.total_sz = priv->ndev * (cali_data->cali_dat_sz_per_dev + 1); - priv->cali_data.data = devm_kzalloc(priv->dev, - ext_cali_data->max, GFP_KERNEL); cali_ctrls[i].name = cali_name; cali_ctrls[i].iface = SNDRV_CTL_ELEM_IFACE_MIXER; cali_ctrls[i].info = snd_soc_bytes_info_ext; diff --git a/sound/soc/codecs/wm5110.c b/sound/soc/codecs/wm5110.c index 502196253d42..64eee0d2347d 100644 --- a/sound/soc/codecs/wm5110.c +++ b/sound/soc/codecs/wm5110.c @@ -302,7 +302,7 @@ static int wm5110_hp_pre_enable(struct snd_soc_dapm_widget *w) } else { wseq = wm5110_no_dre_left_enable; nregs = ARRAY_SIZE(wm5110_no_dre_left_enable); - priv->out_up_delay += 10; + priv->out_up_delay += 10000; } break; case ARIZONA_OUT1R_ENA_SHIFT: @@ -312,7 +312,7 @@ static int wm5110_hp_pre_enable(struct snd_soc_dapm_widget *w) } else { wseq = wm5110_no_dre_right_enable; nregs = ARRAY_SIZE(wm5110_no_dre_right_enable); - priv->out_up_delay += 10; + priv->out_up_delay += 10000; } break; default: @@ -338,7 +338,7 @@ static int wm5110_hp_pre_disable(struct snd_soc_dapm_widget *w) snd_soc_component_update_bits(component, ARIZONA_SPARE_TRIGGERS, ARIZONA_WS_TRG1, 0); - priv->out_down_delay += 27; + priv->out_down_delay += 27000; } break; case ARIZONA_OUT1R_ENA_SHIFT: @@ -350,7 +350,7 @@ static int wm5110_hp_pre_disable(struct snd_soc_dapm_widget *w) snd_soc_component_update_bits(component, ARIZONA_SPARE_TRIGGERS, ARIZONA_WS_TRG2, 0); - priv->out_down_delay += 27; + priv->out_down_delay += 27000; } break; default: diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 1075598a6647..fa4136683392 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -183,6 +183,8 @@ static int micfil_set_quality(struct fsl_micfil *micfil) case QUALITY_VLOW2: qsel = MICFIL_QSEL_VLOW2_QUALITY; break; + default: + return -EINVAL; } return regmap_update_bits(micfil->regmap, REG_MICFIL_CTRL2, diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c index c4eb87c5d39e..9f33dd11d47f 100644 --- a/sound/soc/fsl/fsl_sai.c +++ b/sound/soc/fsl/fsl_sai.c @@ -994,10 +994,10 @@ static struct snd_soc_dai_driver fsl_sai_dai_template[] = { { .name = "sai-tx", .playback = { - .stream_name = "CPU-Playback", + .stream_name = "SAI-Playback", .channels_min = 1, .channels_max = 32, - .rate_min = 8000, + .rate_min = 8000, .rate_max = 2822400, .rates = SNDRV_PCM_RATE_KNOT, .formats = FSL_SAI_FORMATS, @@ -1007,7 +1007,7 @@ static struct snd_soc_dai_driver fsl_sai_dai_template[] = { { .name = "sai-rx", .capture = { - .stream_name = "CPU-Capture", + .stream_name = "SAI-Capture", .channels_min = 1, .channels_max = 32, .rate_min = 8000, diff --git a/sound/soc/fsl/imx-audmix.c b/sound/soc/fsl/imx-audmix.c index 231400661c90..dac5d4ddacd6 100644 --- a/sound/soc/fsl/imx-audmix.c +++ b/sound/soc/fsl/imx-audmix.c @@ -23,7 +23,6 @@ struct imx_audmix { struct snd_soc_card card; struct platform_device *audmix_pdev; struct platform_device *out_pdev; - struct clk *cpu_mclk; int num_dai; struct snd_soc_dai_link *dai; int num_dai_conf; @@ -32,34 +31,11 @@ struct imx_audmix { struct snd_soc_dapm_route *dapm_routes; }; -static const u32 imx_audmix_rates[] = { - 8000, 12000, 16000, 24000, 32000, 48000, 64000, 96000, -}; - -static const struct snd_pcm_hw_constraint_list imx_audmix_rate_constraints = { - .count = ARRAY_SIZE(imx_audmix_rates), - .list = imx_audmix_rates, -}; - static int imx_audmix_fe_startup(struct snd_pcm_substream *substream) { - struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); - struct imx_audmix *priv = snd_soc_card_get_drvdata(rtd->card); struct snd_pcm_runtime *runtime = substream->runtime; - struct device *dev = rtd->card->dev; - unsigned long clk_rate = clk_get_rate(priv->cpu_mclk); int ret; - if (clk_rate % 24576000 == 0) { - ret = snd_pcm_hw_constraint_list(runtime, 0, - SNDRV_PCM_HW_PARAM_RATE, - &imx_audmix_rate_constraints); - if (ret < 0) - return ret; - } else { - dev_warn(dev, "mclk may be not supported %lu\n", clk_rate); - } - ret = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_CHANNELS, 1, 8); if (ret < 0) @@ -143,8 +119,8 @@ static const struct snd_soc_ops imx_audmix_be_ops = { static const char *name[][3] = { {"HiFi-AUDMIX-FE-0", "HiFi-AUDMIX-FE-1", "HiFi-AUDMIX-FE-2"}, {"sai-tx", "sai-tx", "sai-rx"}, - {"AUDMIX-Playback-0", "AUDMIX-Playback-1", "CPU-Capture"}, - {"CPU-Playback", "CPU-Playback", "AUDMIX-Capture-0"}, + {"AUDMIX-Playback-0", "AUDMIX-Playback-1", "SAI-Capture"}, + {"SAI-Playback", "SAI-Playback", "AUDMIX-Capture-0"}, }; static int imx_audmix_probe(struct platform_device *pdev) @@ -323,13 +299,6 @@ static int imx_audmix_probe(struct platform_device *pdev) } put_device(&cpu_pdev->dev); - priv->cpu_mclk = devm_clk_get(&cpu_pdev->dev, "mclk1"); - if (IS_ERR(priv->cpu_mclk)) { - ret = PTR_ERR(priv->cpu_mclk); - dev_err(&cpu_pdev->dev, "failed to get DAI mclk1: %d\n", ret); - return ret; - } - priv->audmix_pdev = audmix_pdev; priv->out_pdev = cpu_pdev; diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index dd414634b4ac..c2445c5ccd84 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -1092,6 +1092,7 @@ int graph_util_parse_dai(struct device *dev, struct device_node *ep, args.np = ep; dai = snd_soc_get_dai_via_args(&args); if (dai) { + dlc->of_node = node; dlc->dai_name = snd_soc_dai_name_get(dai); dlc->dai_args = snd_soc_copy_dai_args(dev, &args); if (!dlc->dai_args) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index b0d35fda7b17..c13064c77726 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -13,6 +13,7 @@ #include <linux/soundwire/sdw.h> #include <linux/soundwire/sdw_type.h> #include <linux/soundwire/sdw_intel.h> +#include <sound/core.h> #include <sound/soc-acpi.h> #include "sof_sdw_common.h" #include "../../codecs/rt711.h" @@ -748,9 +749,36 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { }, .driver_data = (void *)(SOC_SDW_PCH_DMIC), }, + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Google"), + DMI_MATCH(DMI_PRODUCT_NAME, "Fatcat"), + }, + .driver_data = (void *)(SOC_SDW_PCH_DMIC | + SOF_BT_OFFLOAD_SSP(2) | + SOF_SSP_BT_OFFLOAD_PRESENT), + }, + {} +}; + +static const struct snd_pci_quirk sof_sdw_ssid_quirk_table[] = { + SND_PCI_QUIRK(0x1043, 0x1e13, "ASUS Zenbook S14", SOC_SDW_CODEC_MIC), {} }; +static void sof_sdw_check_ssid_quirk(const struct snd_soc_acpi_mach *mach) +{ + const struct snd_pci_quirk *quirk_entry; + + quirk_entry = snd_pci_quirk_lookup_id(mach->mach_params.subsystem_vendor, + mach->mach_params.subsystem_device, + sof_sdw_ssid_quirk_table); + + if (quirk_entry) + sof_sdw_quirk = quirk_entry->value; +} + static struct snd_soc_dai_link_component platform_component[] = { { /* name might be overridden during probe */ @@ -775,7 +803,9 @@ static int create_sdw_dailink(struct snd_soc_card *card, int *be_id, struct snd_soc_codec_conf **codec_conf) { struct device *dev = card->dev; + struct snd_soc_acpi_mach *mach = dev_get_platdata(card->dev); struct asoc_sdw_mc_private *ctx = snd_soc_card_get_drvdata(card); + struct snd_soc_acpi_mach_params *mach_params = &mach->mach_params; struct intel_mc_ctx *intel_ctx = (struct intel_mc_ctx *)ctx->private; struct asoc_sdw_endpoint *sof_end; int stream; @@ -872,6 +902,11 @@ static int create_sdw_dailink(struct snd_soc_card *card, codecs[j].name = sof_end->codec_name; codecs[j].dai_name = sof_end->dai_info->dai_name; + if (sof_end->dai_info->dai_type == SOC_SDW_DAI_TYPE_MIC && + mach_params->dmic_num > 0) { + dev_warn(dev, + "Both SDW DMIC and PCH DMIC are present, if incorrect, please set kernel params snd_sof_intel_hda_generic dmic_num=0 to disable PCH DMIC\n"); + } j++; } @@ -1278,6 +1313,13 @@ static int mc_probe(struct platform_device *pdev) snd_soc_card_set_drvdata(card, ctx); + if (mach->mach_params.subsystem_id_set) { + snd_soc_card_set_pci_ssid(card, + mach->mach_params.subsystem_vendor, + mach->mach_params.subsystem_device); + sof_sdw_check_ssid_quirk(mach); + } + dmi_check_system(sof_sdw_quirk_table); if (quirk_override != -1) { @@ -1293,12 +1335,6 @@ static int mc_probe(struct platform_device *pdev) for (i = 0; i < ctx->codec_info_list_count; i++) codec_info_list[i].amp_num = 0; - if (mach->mach_params.subsystem_id_set) { - snd_soc_card_set_pci_ssid(card, - mach->mach_params.subsystem_vendor, - mach->mach_params.subsystem_device); - } - ret = sof_card_dai_links_create(card); if (ret < 0) return ret; diff --git a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c index 770e2194a283..9e611e3667ad 100644 --- a/sound/soc/intel/common/soc-acpi-intel-mtl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-mtl-match.c @@ -330,7 +330,7 @@ static const struct snd_soc_acpi_adr_device rt1316_3_single_adr[] = { static const struct snd_soc_acpi_adr_device rt1318_1_single_adr[] = { { - .adr = 0x000130025D131801, + .adr = 0x000130025D131801ull, .num_endpoints = 1, .endpoints = &single_endpoint, .name_prefix = "rt1318-1" diff --git a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c index 9eb4a43e3e7a..dd7993b76dee 100644 --- a/sound/soc/intel/common/soc-acpi-intel-ptl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-ptl-match.c @@ -251,7 +251,7 @@ static const struct snd_soc_acpi_link_adr ptl_rvp[] = { {} }; -static const struct snd_soc_acpi_link_adr lnl_sdw_rt713_vb_l2_rt1320_l13[] = { +static const struct snd_soc_acpi_link_adr ptl_sdw_rt713_vb_l2_rt1320_l13[] = { { .mask = BIT(2), .num_adr = ARRAY_SIZE(rt713_vb_2_adr), @@ -270,7 +270,7 @@ static const struct snd_soc_acpi_link_adr lnl_sdw_rt713_vb_l2_rt1320_l13[] = { {} }; -static const struct snd_soc_acpi_link_adr lnl_sdw_rt712_vb_l2_rt1320_l1[] = { +static const struct snd_soc_acpi_link_adr ptl_sdw_rt712_vb_l2_rt1320_l1[] = { { .mask = BIT(2), .num_adr = ARRAY_SIZE(rt712_vb_2_group1_adr), @@ -337,17 +337,17 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_ptl_sdw_machines[] = { }, { .link_mask = BIT(1) | BIT(2), - .links = lnl_sdw_rt712_vb_l2_rt1320_l1, + .links = ptl_sdw_rt712_vb_l2_rt1320_l1, .drv_name = "sof_sdw", .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb, - .sof_tplg_filename = "sof-lnl-rt712-l2-rt1320-l1.tplg" + .sof_tplg_filename = "sof-ptl-rt712-l2-rt1320-l1.tplg" }, { .link_mask = BIT(1) | BIT(2) | BIT(3), - .links = lnl_sdw_rt713_vb_l2_rt1320_l13, + .links = ptl_sdw_rt713_vb_l2_rt1320_l13, .drv_name = "sof_sdw", .machine_check = snd_soc_acpi_intel_sdca_is_device_rt712_vb, - .sof_tplg_filename = "sof-lnl-rt713-l2-rt1320-l13.tplg" + .sof_tplg_filename = "sof-ptl-rt713-l2-rt1320-l13.tplg" }, {}, }; diff --git a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c index 6f8c06413665..b77aafb0bfb6 100644 --- a/sound/soc/intel/common/soc-acpi-intel-tgl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-tgl-match.c @@ -658,25 +658,25 @@ static const struct snd_soc_acpi_endpoint cs35l56_7_fb_endpoints[] = { static const struct snd_soc_acpi_adr_device cs35l56_sdw_eight_1_4_fb_adr[] = { { - .adr = 0x00003301fa355601, + .adr = 0x00003301fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_l_fb_endpoints), .endpoints = cs35l56_l_fb_endpoints, .name_prefix = "AMP1" }, { - .adr = 0x00003201fa355601, + .adr = 0x00003201fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_2_fb_endpoints), .endpoints = cs35l56_2_fb_endpoints, .name_prefix = "AMP2" }, { - .adr = 0x00003101fa355601, + .adr = 0x00003101fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_4_fb_endpoints), .endpoints = cs35l56_4_fb_endpoints, .name_prefix = "AMP3" }, { - .adr = 0x00003001fa355601, + .adr = 0x00003001fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_6_fb_endpoints), .endpoints = cs35l56_6_fb_endpoints, .name_prefix = "AMP4" @@ -685,25 +685,25 @@ static const struct snd_soc_acpi_adr_device cs35l56_sdw_eight_1_4_fb_adr[] = { static const struct snd_soc_acpi_adr_device cs35l56_sdw_eight_5_8_fb_adr[] = { { - .adr = 0x00013701fa355601, + .adr = 0x00013701fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_r_fb_endpoints), .endpoints = cs35l56_r_fb_endpoints, .name_prefix = "AMP8" }, { - .adr = 0x00013601fa355601, + .adr = 0x00013601fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_3_fb_endpoints), .endpoints = cs35l56_3_fb_endpoints, .name_prefix = "AMP7" }, { - .adr = 0x00013501fa355601, + .adr = 0x00013501fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_5_fb_endpoints), .endpoints = cs35l56_5_fb_endpoints, .name_prefix = "AMP6" }, { - .adr = 0x00013401fa355601, + .adr = 0x00013401fa355601ull, .num_endpoints = ARRAY_SIZE(cs35l56_7_fb_endpoints), .endpoints = cs35l56_7_fb_endpoints, .name_prefix = "AMP5" diff --git a/sound/soc/renesas/rcar/core.c b/sound/soc/renesas/rcar/core.c index d3709fd0409e..f3f0c3f0bb9f 100644 --- a/sound/soc/renesas/rcar/core.c +++ b/sound/soc/renesas/rcar/core.c @@ -1770,20 +1770,6 @@ int rsnd_kctrl_accept_anytime(struct rsnd_dai_stream *io) return 1; } -int rsnd_kctrl_accept_runtime(struct rsnd_dai_stream *io) -{ - struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); - struct rsnd_priv *priv = rsnd_io_to_priv(io); - struct device *dev = rsnd_priv_to_dev(priv); - - if (!runtime) { - dev_warn(dev, "Can't update kctrl when idle\n"); - return 0; - } - - return 1; -} - struct rsnd_kctrl_cfg *rsnd_kctrl_init_m(struct rsnd_kctrl_cfg_m *cfg) { cfg->cfg.val = cfg->val; diff --git a/sound/soc/renesas/rcar/rsnd.h b/sound/soc/renesas/rcar/rsnd.h index a5f54b65313c..04c70690f7a2 100644 --- a/sound/soc/renesas/rcar/rsnd.h +++ b/sound/soc/renesas/rcar/rsnd.h @@ -742,7 +742,6 @@ struct rsnd_kctrl_cfg_s { #define rsnd_kctrl_vals(x) ((x).val) /* = (x).cfg.val[0] */ int rsnd_kctrl_accept_anytime(struct rsnd_dai_stream *io); -int rsnd_kctrl_accept_runtime(struct rsnd_dai_stream *io); struct rsnd_kctrl_cfg *rsnd_kctrl_init_m(struct rsnd_kctrl_cfg_m *cfg); struct rsnd_kctrl_cfg *rsnd_kctrl_init_s(struct rsnd_kctrl_cfg_s *cfg); int rsnd_kctrl_new(struct rsnd_mod *mod, diff --git a/sound/soc/renesas/rcar/src.c b/sound/soc/renesas/rcar/src.c index e7f86db0d94c..7d73b183bda6 100644 --- a/sound/soc/renesas/rcar/src.c +++ b/sound/soc/renesas/rcar/src.c @@ -35,6 +35,7 @@ struct rsnd_src { struct rsnd_mod *dma; struct rsnd_kctrl_cfg_s sen; /* sync convert enable */ struct rsnd_kctrl_cfg_s sync; /* sync convert */ + u32 current_sync_rate; int irq; }; @@ -100,7 +101,7 @@ static u32 rsnd_src_convert_rate(struct rsnd_dai_stream *io, if (!rsnd_src_sync_is_enabled(mod)) return rsnd_io_converted_rate(io); - convert_rate = src->sync.val; + convert_rate = src->current_sync_rate; if (!convert_rate) convert_rate = rsnd_io_converted_rate(io); @@ -201,13 +202,73 @@ static const u32 chan222222[] = { static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, struct rsnd_mod *mod) { + struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); struct rsnd_priv *priv = rsnd_mod_to_priv(mod); - struct device *dev = rsnd_priv_to_dev(priv); + struct rsnd_src *src = rsnd_mod_to_src(mod); + u32 fin, fout, new_rate; + int inc, cnt, rate; + u64 base, val; + + if (!runtime) + return; + + if (!rsnd_src_sync_is_enabled(mod)) + return; + + fin = rsnd_src_get_in_rate(priv, io); + fout = rsnd_src_get_out_rate(priv, io); + + new_rate = src->sync.val; + + if (!new_rate) + new_rate = fout; + + /* Do nothing if no diff */ + if (new_rate == src->current_sync_rate) + return; + + /* + * SRCm_IFSVR::INTIFS can change within 1% + * see + * SRCm_IFSVR::INTIFS Note + */ + inc = fout / 100; + cnt = abs(new_rate - fout) / inc; + if (fout > new_rate) + inc *= -1; + + /* + * After start running SRC, we can update only SRC_IFSVR + * for Synchronous Mode + */ + base = (u64)0x0400000 * fin; + rate = fout; + for (int i = 0; i < cnt; i++) { + val = base; + rate += inc; + do_div(val, rate); + + rsnd_mod_write(mod, SRC_IFSVR, val); + } + val = base; + do_div(val, new_rate); + + rsnd_mod_write(mod, SRC_IFSVR, val); + + /* update current_sync_rate */ + src->current_sync_rate = new_rate; +} + +static void rsnd_src_init_convert_rate(struct rsnd_dai_stream *io, + struct rsnd_mod *mod) +{ struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); + struct rsnd_priv *priv = rsnd_mod_to_priv(mod); + struct device *dev = rsnd_priv_to_dev(priv); int is_play = rsnd_io_is_play(io); int use_src = 0; u32 fin, fout; - u32 ifscr, fsrate, adinr; + u32 ifscr, adinr; u32 cr, route; u32 i_busif, o_busif, tmp; const u32 *bsdsr_table; @@ -245,26 +306,15 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, adinr = rsnd_get_adinr_bit(mod, io) | chan; /* - * SRC_IFSCR / SRC_IFSVR - */ - ifscr = 0; - fsrate = 0; - if (use_src) { - u64 n; - - ifscr = 1; - n = (u64)0x0400000 * fin; - do_div(n, fout); - fsrate = n; - } - - /* + * SRC_IFSCR * SRC_SRCCR / SRC_ROUTE_MODE0 */ + ifscr = 0; cr = 0x00011110; route = 0x0; if (use_src) { route = 0x1; + ifscr = 0x1; if (rsnd_src_sync_is_enabled(mod)) { cr |= 0x1; @@ -335,7 +385,6 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, rsnd_mod_write(mod, SRC_SRCIR, 1); /* initialize */ rsnd_mod_write(mod, SRC_ADINR, adinr); rsnd_mod_write(mod, SRC_IFSCR, ifscr); - rsnd_mod_write(mod, SRC_IFSVR, fsrate); rsnd_mod_write(mod, SRC_SRCCR, cr); rsnd_mod_write(mod, SRC_BSDSR, bsdsr_table[idx]); rsnd_mod_write(mod, SRC_BSISR, bsisr_table[idx]); @@ -348,6 +397,9 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, rsnd_adg_set_src_timesel_gen2(mod, io, fin, fout); + /* update SRC_IFSVR */ + rsnd_src_set_convert_rate(io, mod); + return; convert_rate_err: @@ -467,7 +519,8 @@ static int rsnd_src_init(struct rsnd_mod *mod, int ret; /* reset sync convert_rate */ - src->sync.val = 0; + src->sync.val = + src->current_sync_rate = 0; ret = rsnd_mod_power_on(mod); if (ret < 0) @@ -475,7 +528,7 @@ static int rsnd_src_init(struct rsnd_mod *mod, rsnd_src_activation(mod); - rsnd_src_set_convert_rate(io, mod); + rsnd_src_init_convert_rate(io, mod); rsnd_src_status_clear(mod); @@ -493,7 +546,8 @@ static int rsnd_src_quit(struct rsnd_mod *mod, rsnd_mod_power_off(mod); /* reset sync convert_rate */ - src->sync.val = 0; + src->sync.val = + src->current_sync_rate = 0; return 0; } @@ -531,6 +585,22 @@ static irqreturn_t rsnd_src_interrupt(int irq, void *data) return IRQ_HANDLED; } +static int rsnd_src_kctrl_accept_runtime(struct rsnd_dai_stream *io) +{ + struct snd_pcm_runtime *runtime = rsnd_io_to_runtime(io); + + if (!runtime) { + struct rsnd_priv *priv = rsnd_io_to_priv(io); + struct device *dev = rsnd_priv_to_dev(priv); + + dev_warn(dev, "\"SRC Out Rate\" can use during running\n"); + + return 0; + } + + return 1; +} + static int rsnd_src_probe_(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) @@ -585,7 +655,7 @@ static int rsnd_src_pcm_new(struct rsnd_mod *mod, "SRC Out Rate Switch" : "SRC In Rate Switch", rsnd_kctrl_accept_anytime, - rsnd_src_set_convert_rate, + rsnd_src_init_convert_rate, &src->sen, 1); if (ret < 0) return ret; @@ -594,7 +664,7 @@ static int rsnd_src_pcm_new(struct rsnd_mod *mod, rsnd_io_is_play(io) ? "SRC Out Rate" : "SRC In Rate", - rsnd_kctrl_accept_runtime, + rsnd_src_kctrl_accept_runtime, rsnd_src_set_convert_rate, &src->sync, 192000); diff --git a/sound/soc/renesas/rcar/ssi.c b/sound/soc/renesas/rcar/ssi.c index b3d4e8ae07ef..0c6424a1fcac 100644 --- a/sound/soc/renesas/rcar/ssi.c +++ b/sound/soc/renesas/rcar/ssi.c @@ -336,7 +336,8 @@ static int rsnd_ssi_master_clk_start(struct rsnd_mod *mod, return 0; rate_err: - dev_err(dev, "unsupported clock rate\n"); + dev_err(dev, "unsupported clock rate (%d)\n", rate); + return ret; } diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c index 7f5fcaecee4b..78ab88843f86 100644 --- a/sound/soc/rockchip/rockchip_i2s_tdm.c +++ b/sound/soc/rockchip/rockchip_i2s_tdm.c @@ -451,11 +451,11 @@ static int rockchip_i2s_tdm_set_fmt(struct snd_soc_dai *cpu_dai, break; case SND_SOC_DAIFMT_DSP_A: val = I2S_TXCR_TFS_TDM_PCM; - tdm_val = TDM_SHIFT_CTRL(0); + tdm_val = TDM_SHIFT_CTRL(2); break; case SND_SOC_DAIFMT_DSP_B: val = I2S_TXCR_TFS_TDM_PCM; - tdm_val = TDM_SHIFT_CTRL(2); + tdm_val = TDM_SHIFT_CTRL(4); break; default: ret = -EINVAL; diff --git a/sound/soc/sof/amd/acp-ipc.c b/sound/soc/sof/amd/acp-ipc.c index 5f371d9263f3..22d4b807e1bb 100644 --- a/sound/soc/sof/amd/acp-ipc.c +++ b/sound/soc/sof/amd/acp-ipc.c @@ -165,8 +165,9 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context) int dsp_msg, dsp_ack; unsigned int status; - if (sdev->first_boot && sdev->fw_state != SOF_FW_BOOT_COMPLETE) { + if (unlikely(sdev->first_boot && sdev->fw_state != SOF_FW_BOOT_COMPLETE)) { acp_mailbox_read(sdev, sdev->dsp_box.offset, &status, sizeof(status)); + if ((status & SOF_IPC_PANIC_MAGIC_MASK) == SOF_IPC_PANIC_MAGIC) { snd_sof_dsp_panic(sdev, sdev->dsp_box.offset + sizeof(status), true); @@ -188,13 +189,21 @@ irqreturn_t acp_sof_ipc_irq_thread(int irq, void *context) dsp_ack = snd_sof_dsp_read(sdev, ACP_DSP_BAR, ACP_SCRATCH_REG_0 + dsp_ack_write); if (dsp_ack) { - spin_lock_irq(&sdev->ipc_lock); - /* handle immediate reply from DSP core */ - acp_dsp_ipc_get_reply(sdev); - snd_sof_ipc_reply(sdev, 0); - /* set the done bit */ - acp_dsp_ipc_dsp_done(sdev); - spin_unlock_irq(&sdev->ipc_lock); + if (likely(sdev->fw_state == SOF_FW_BOOT_COMPLETE)) { + spin_lock_irq(&sdev->ipc_lock); + + /* handle immediate reply from DSP core */ + acp_dsp_ipc_get_reply(sdev); + snd_sof_ipc_reply(sdev, 0); + /* set the done bit */ + acp_dsp_ipc_dsp_done(sdev); + + spin_unlock_irq(&sdev->ipc_lock); + } else { + dev_dbg_ratelimited(sdev->dev, "IPC reply before FW_BOOT_COMPLETE: %#x\n", + dsp_ack); + } + ipc_irq = true; } diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 33648ff8b833..9e13c96528be 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -27,6 +27,7 @@ MODULE_PARM_DESC(enable_fw_debug, "Enable Firmware debug"); static struct acp_quirk_entry quirk_valve_galileo = { .signed_fw_image = true, .skip_iram_dram_size_mod = true, + .post_fw_run_delay = true, }; const struct dmi_system_id acp_sof_quirk_table[] = { diff --git a/sound/soc/sof/amd/acp.h b/sound/soc/sof/amd/acp.h index 800594440f73..2a19d82d6200 100644 --- a/sound/soc/sof/amd/acp.h +++ b/sound/soc/sof/amd/acp.h @@ -220,6 +220,7 @@ struct sof_amd_acp_desc { struct acp_quirk_entry { bool signed_fw_image; bool skip_iram_dram_size_mod; + bool post_fw_run_delay; }; /* Common device data struct for ACP devices */ diff --git a/sound/soc/sof/amd/pci-vangogh.c b/sound/soc/sof/amd/pci-vangogh.c index 53f64d6bc91b..28f2d4050a67 100644 --- a/sound/soc/sof/amd/pci-vangogh.c +++ b/sound/soc/sof/amd/pci-vangogh.c @@ -13,11 +13,9 @@ #include <linux/module.h> #include <linux/pci.h> -#include <linux/platform_device.h> #include <sound/sof.h> #include <sound/soc-acpi.h> -#include "../ops.h" #include "../sof-pci-dev.h" #include "../../amd/mach-config.h" #include "acp.h" diff --git a/sound/soc/sof/amd/vangogh.c b/sound/soc/sof/amd/vangogh.c index 8e2672106ac6..6ed5f9aaa414 100644 --- a/sound/soc/sof/amd/vangogh.c +++ b/sound/soc/sof/amd/vangogh.c @@ -11,13 +11,10 @@ * Hardware interface for Audio DSP on Vangogh platform */ -#include <linux/platform_device.h> +#include <linux/delay.h> #include <linux/module.h> -#include "../ops.h" -#include "../sof-audio.h" #include "acp.h" -#include "acp-dsp-offset.h" #define I2S_HS_INSTANCE 0 #define I2S_BT_INSTANCE 1 @@ -136,6 +133,20 @@ static struct snd_soc_dai_driver vangogh_sof_dai[] = { }, }; +static int sof_vangogh_post_fw_run_delay(struct snd_sof_dev *sdev) +{ + /* + * Resuming from suspend in some cases my cause the DSP firmware + * to enter an unrecoverable faulty state. Delaying a bit any host + * to DSP transmission right after firmware boot completion seems + * to resolve the issue. + */ + if (!sdev->first_boot) + usleep_range(100, 150); + + return 0; +} + /* Vangogh ops */ struct snd_sof_dsp_ops sof_vangogh_ops; EXPORT_SYMBOL_NS(sof_vangogh_ops, "SND_SOC_SOF_AMD_COMMON"); @@ -157,6 +168,9 @@ int sof_vangogh_ops_init(struct snd_sof_dev *sdev) if (quirks->signed_fw_image) sof_vangogh_ops.load_firmware = acp_sof_load_signed_firmware; + + if (quirks->post_fw_run_delay) + sof_vangogh_ops.post_fw_run = sof_vangogh_post_fw_run_delay; } return 0; diff --git a/sound/soc/sof/intel/hda-codec.c b/sound/soc/sof/intel/hda-codec.c index 568f3dfe822f..2f9925830d1d 100644 --- a/sound/soc/sof/intel/hda-codec.c +++ b/sound/soc/sof/intel/hda-codec.c @@ -454,6 +454,7 @@ int hda_codec_i915_exit(struct snd_sof_dev *sdev) } EXPORT_SYMBOL_NS_GPL(hda_codec_i915_exit, "SND_SOC_SOF_HDA_AUDIO_CODEC_I915"); +MODULE_SOFTDEP("pre: snd-hda-codec-hdmi"); #endif MODULE_LICENSE("Dual BSD/GPL"); diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index be689f6e10c8..a1ccd95da8bb 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -1312,22 +1312,8 @@ struct snd_soc_acpi_mach *hda_machine_select(struct snd_sof_dev *sdev) /* report to machine driver if any DMICs are found */ mach->mach_params.dmic_num = check_dmic_num(sdev); - if (sdw_mach_found) { - /* - * DMICs use up to 4 pins and are typically pin-muxed with SoundWire - * link 2 and 3, or link 1 and 2, thus we only try to enable dmics - * if all conditions are true: - * a) 2 or fewer links are used by SoundWire - * b) the NHLT table reports the presence of microphones - */ - if (hweight_long(mach->link_mask) <= 2) - dmic_fixup = true; - else - mach->mach_params.dmic_num = 0; - } else { - if (mach->tplg_quirk_mask & SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER) - dmic_fixup = true; - } + if (sdw_mach_found || mach->tplg_quirk_mask & SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER) + dmic_fixup = true; if (tplg_fixup && dmic_fixup && diff --git a/sound/soc/sof/intel/pci-ptl.c b/sound/soc/sof/intel/pci-ptl.c index 0aacdfac9fb4..c4fb6a2441b7 100644 --- a/sound/soc/sof/intel/pci-ptl.c +++ b/sound/soc/sof/intel/pci-ptl.c @@ -50,6 +50,7 @@ static const struct sof_dev_desc ptl_desc = { /* PCI IDs */ static const struct pci_device_id sof_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, HDA_PTL, &ptl_desc) }, /* PTL */ + { PCI_DEVICE_DATA(INTEL, HDA_PTL_H, &ptl_desc) }, /* PTL-H */ { 0, } }; MODULE_DEVICE_TABLE(pci, sof_pci_ids); diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c index c04c62478827..6d5cda813e48 100644 --- a/sound/soc/sof/ipc4-topology.c +++ b/sound/soc/sof/ipc4-topology.c @@ -765,10 +765,16 @@ static int sof_ipc4_widget_setup_comp_dai(struct snd_sof_widget *swidget) } list_for_each_entry(w, &sdev->widget_list, list) { - if (w->widget->sname && + struct snd_sof_dai *alh_dai; + + if (!WIDGET_IS_DAI(w->id) || !w->widget->sname || strcmp(w->widget->sname, swidget->widget->sname)) continue; + alh_dai = w->private; + if (alh_dai->type != SOF_DAI_INTEL_ALH) + continue; + blob->alh_cfg.device_count++; } @@ -2061,11 +2067,13 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget, list_for_each_entry(w, &sdev->widget_list, list) { u32 node_type; - if (w->widget->sname && + if (!WIDGET_IS_DAI(w->id) || !w->widget->sname || strcmp(w->widget->sname, swidget->widget->sname)) continue; dai = w->private; + if (dai->type != SOF_DAI_INTEL_ALH) + continue; alh_copier = (struct sof_ipc4_copier *)dai->private; alh_data = &alh_copier->data; node_type = SOF_IPC4_GET_NODE_TYPE(alh_data->gtw_cfg.node_id); diff --git a/sound/soc/sof/pcm.c b/sound/soc/sof/pcm.c index 35a7462d8b69..c5c6353f18ce 100644 --- a/sound/soc/sof/pcm.c +++ b/sound/soc/sof/pcm.c @@ -511,6 +511,8 @@ static int sof_pcm_close(struct snd_soc_component *component, */ } + spcm->stream[substream->stream].substream = NULL; + return 0; } diff --git a/sound/soc/sof/stream-ipc.c b/sound/soc/sof/stream-ipc.c index 794c7bbccbaf..8262443ac89a 100644 --- a/sound/soc/sof/stream-ipc.c +++ b/sound/soc/sof/stream-ipc.c @@ -43,7 +43,7 @@ int sof_ipc_msg_data(struct snd_sof_dev *sdev, return -ESTRPIPE; posn_offset = stream->posn_offset; - } else { + } else if (sps->cstream) { struct sof_compr_stream *sstream = sps->cstream->runtime->private_data; @@ -51,6 +51,10 @@ int sof_ipc_msg_data(struct snd_sof_dev *sdev, return -ESTRPIPE; posn_offset = sstream->posn_offset; + + } else { + dev_err(sdev->dev, "%s: No stream opened\n", __func__); + return -EINVAL; } snd_sof_dsp_mailbox_read(sdev, posn_offset, p, sz); diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 737dd00e97b1..779d97d31f17 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1145,7 +1145,7 @@ static int snd_usbmidi_output_close(struct snd_rawmidi_substream *substream) { struct usbmidi_out_port *port = substream->runtime->private_data; - cancel_work_sync(&port->ep->work); + flush_work(&port->ep->work); return substream_open(substream, 0, 0); } diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index a97efb7b131e..09210fb4ac60 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1868,6 +1868,7 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs, case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */ subs->stream_offset_adj = 2; break; + case USB_ID(0x2b73, 0x000a): /* Pioneer DJM-900NXS2 */ case USB_ID(0x2b73, 0x0013): /* Pioneer DJM-450 */ pioneer_djm_set_format_quirk(subs, 0x0082); break; diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 100ad3dc091a..3feac0482fe9 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -196,22 +196,22 @@ static int get_family_id(int sd) static void print_delayacct(struct taskstats *t) { - printf("\n\nCPU %15s%15s%15s%15s%15s%15s\n" - " %15llu%15llu%15llu%15llu%15.3fms%13.6fms\n" - "IO %15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "SWAP %15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "RECLAIM %12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "THRASHING%12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "COMPACT %12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "WPCOPY %12s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n" - "IRQ %15s%15s%15s%15s\n" - " %15llu%15llu%15.3fms%13.6fms\n", + printf("\n\nCPU %15s%15s%15s%15s%15s%15s%15s\n" + " %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "IO %15s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "SWAP %15s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "RECLAIM %12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "THRASHING%12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "COMPACT %12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "WPCOPY %12s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n" + "IRQ %15s%15s%15s%15s%15s\n" + " %15llu%15llu%15.3fms%13.6fms%13.6fms\n", "count", "real total", "virtual total", "delay total", "delay average", "delay max", "delay min", (unsigned long long)t->cpu_count, diff --git a/tools/arch/arm64/tools/Makefile b/tools/arch/arm64/tools/Makefile index 7b42feedf647..de4f1b66ef01 100644 --- a/tools/arch/arm64/tools/Makefile +++ b/tools/arch/arm64/tools/Makefile @@ -13,12 +13,6 @@ AWK ?= awk MKDIR ?= mkdir RM ?= rm -ifeq ($(V),1) -Q = -else -Q = @ -endif - arm64_tools_dir = $(top_srcdir)/arch/arm64/tools arm64_sysreg_tbl = $(arm64_tools_dir)/sysreg arm64_gen_sysreg = $(arm64_tools_dir)/gen-sysreg.awk diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 243b79f2b451..062bbd6cd048 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -27,12 +27,6 @@ srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif -ifeq ($(V),1) - Q = -else - Q = @ -endif - FEATURE_USER = .bpf FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled FEATURE_DISPLAY = libbfd diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index 4315652678b9..bf843f328812 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -5,12 +5,6 @@ INSTALL ?= install RM ?= rm -f RMDIR ?= rmdir --ignore-fail-on-non-empty -ifeq ($(V),1) - Q = -else - Q = @ -endif - prefix ?= /usr/local mandir ?= $(prefix)/man man8dir = $(mandir)/man8 diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index dd9f3ec84201..6ea4823b770c 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -7,12 +7,6 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif -ifeq ($(V),1) - Q = -else - Q = @ -endif - BPF_DIR = $(srctree)/tools/lib/bpf ifneq ($(OUTPUT),) diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 4b8079f294f6..afbddea3a39c 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -5,10 +5,8 @@ include ../../scripts/Makefile.arch srctree := $(abspath $(CURDIR)/../../../) ifeq ($(V),1) - Q = msg = else - Q = @ ifeq ($(silent),1) msg = else diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index c4f1f1735af6..e49203ebd48c 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -26,10 +26,7 @@ VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \ $(wildcard $(VMLINUX_BTF_PATHS)))) -ifeq ($(V),1) -Q = -else -Q = @ +ifneq ($(V),1) MAKEFLAGS += --no-print-directory submake_extras := feature_display=0 endif diff --git a/tools/build/Makefile b/tools/build/Makefile index 18ad131f6ea7..63ef21878761 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -17,13 +17,7 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld) export HOSTCC HOSTLD HOSTAR -ifeq ($(V),1) - Q = -else - Q = @ -endif - -export Q srctree CC LD +export srctree CC LD MAKEFLAGS := --no-print-directory build := -f $(srctree)/tools/build/Makefile.build dir=. obj diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 857a5f7b413d..168140f8e646 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -53,13 +53,6 @@ include $(srctree)/tools/scripts/Makefile.include # copy a bit from Linux kbuild -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - INCLUDES = -I$(or $(OUTPUT),.) \ -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \ -I$(srctree)/tools/arch/$(SRCARCH)/include @@ -96,12 +89,6 @@ override CFLAGS += $(CLANG_CROSS_FLAGS) # flags specific for shared library SHLIB_FLAGS := -DSHARED -fPIC -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Disable command line variables (CFLAGS) override from top # level Makefile (perf), otherwise build Makefile will get # the same command line setup. diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 3a9b2140aa04..e9a7ac2c062e 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - TEST_ARGS := $(if $(V),-v) # Set compile option CFLAGS diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile index 8890fd57b110..a1f5e388644d 100644 --- a/tools/lib/thermal/Makefile +++ b/tools/lib/thermal/Makefile @@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Set compile option CFLAGS ifdef EXTRA_CFLAGS CFLAGS := $(EXTRA_CFLAGS) diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c index bcac7ebfb51f..d7e5e8902af8 100644 --- a/tools/mm/page-types.c +++ b/tools/mm/page-types.c @@ -24,8 +24,8 @@ #include <signal.h> #include <inttypes.h> #include <sys/types.h> -#include <sys/errno.h> -#include <sys/fcntl.h> +#include <errno.h> +#include <fcntl.h> #include <sys/mount.h> #include <sys/statfs.h> #include <sys/mman.h> diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index f56e27727534..7a65948892e5 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -46,12 +46,6 @@ HOST_OVERRIDES := CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" AWK = awk MKDIR = mkdir -ifeq ($(V),1) - Q = -else - Q = @ -endif - BUILD_ORC := n ifeq ($(SRCARCH),x86) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 753dbc4f8198..ce973d9d8e6d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -227,6 +227,7 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || + strstr(func->name, "_4core9panicking13assert_failed") || strstr(func->name, "_4core9panicking11panic_const24panic_const_") || (strstr(func->name, "_4core5slice5index24slice_") && str_ends_with(func->name, "_fail")); @@ -1975,6 +1976,14 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, reloc_addend(reloc) == pfunc->offset) break; + /* + * Clang sometimes leaves dangling unused jump table entries + * which point to the end of the function. Ignore them. + */ + if (reloc->sym->sec == pfunc->sec && + reloc_addend(reloc) == pfunc->offset + pfunc->len) + goto next; + dest_insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); if (!dest_insn) break; @@ -1992,6 +2001,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn, alt->insn = dest_insn; alt->next = insn->alts; insn->alts = alt; +next: prev_offset = reloc_offset(reloc); } @@ -2264,7 +2274,7 @@ static int read_annotate(struct objtool_file *file, if (sec->sh.sh_entsize != 8) { static bool warned = false; - if (!warned) { + if (!warned && opts.verbose) { WARN("%s: dodgy linker, sh_entsize != 8", sec->name); warned = true; } @@ -2462,13 +2472,14 @@ static void mark_rodata(struct objtool_file *file) * * - .rodata: can contain GCC switch tables * - .rodata.<func>: same, if -fdata-sections is being used - * - .rodata..c_jump_table: contains C annotated jump tables + * - .data.rel.ro.c_jump_table: contains C annotated jump tables * * .rodata.str1.* sections are ignored; they don't contain jump tables. */ for_each_sec(file, sec) { - if (!strncmp(sec->name, ".rodata", 7) && - !strstr(sec->name, ".str1.")) { + if ((!strncmp(sec->name, ".rodata", 7) && + !strstr(sec->name, ".str1.")) || + !strncmp(sec->name, ".data.rel.ro", 12)) { sec->rodata = true; found = true; } diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h index e7ee7ffccefd..e049679bb17b 100644 --- a/tools/objtool/include/objtool/special.h +++ b/tools/objtool/include/objtool/special.h @@ -10,7 +10,7 @@ #include <objtool/check.h> #include <objtool/elf.h> -#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" +#define C_JUMP_TABLE_SECTION ".data.rel.ro.c_jump_table" struct special_alt { struct list_head list; diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index b2174894f9f7..6bb7edda3094 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -19,7 +19,7 @@ NORETURN(__x64_sys_exit_group) NORETURN(arch_cpu_idle_dead) NORETURN(bch2_trans_in_restart_error) NORETURN(bch2_trans_restart_error) -NORETURN(bch2_trans_unlocked_error) +NORETURN(bch2_trans_unlocked_or_in_restart_error) NORETURN(cpu_bringup_and_idle) NORETURN(cpu_startup_entry) NORETURN(do_exit) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 55d6ce9ea52f..05c083bb1122 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -161,47 +161,6 @@ export VPATH SOURCE := $(shell ln -sf $(srctree)/tools/perf $(OUTPUT)/source) endif -# Beautify output -# --------------------------------------------------------------------------- -# -# Most of build commands in Kbuild start with "cmd_". You can optionally define -# "quiet_cmd_*". If defined, the short log is printed. Otherwise, no log from -# that command is printed by default. -# -# e.g.) -# quiet_cmd_depmod = DEPMOD $(MODLIB) -# cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE) -# -# A simple variant is to prefix commands with $(Q) - that's useful -# for commands that shall be hidden in non-verbose mode. -# -# $(Q)$(MAKE) $(build)=scripts/basic -# -# To put more focus on warnings, be less verbose as default -# Use 'make V=1' to see the full commands - -ifeq ($(V),1) - quiet = - Q = -else - quiet=quiet_ - Q=@ -endif - -# If the user is running make -s (silent mode), suppress echoing of commands -# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS. -ifeq ($(filter 3.%,$(MAKE_VERSION)),) -short-opts := $(firstword -$(MAKEFLAGS)) -else -short-opts := $(filter-out --%,$(MAKEFLAGS)) -endif - -ifneq ($(findstring s,$(short-opts)),) - quiet=silent_ -endif - -export quiet Q - # Do not use make's built-in rules # (this improves performance and avoids hard-to-debug behaviour); MAKEFLAGS += -r diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index f3e15e9efa76..7849405614b1 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -270,8 +270,16 @@ void bpf_obj_drop_impl(void *kptr, void *meta) __ksym; #define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL)) #define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL) -void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; -void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym; +int bpf_list_push_front_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; +#define bpf_list_push_front(head, node) bpf_list_push_front_impl(head, node, NULL, 0) + +int bpf_list_push_back_impl(struct bpf_list_head *head, + struct bpf_list_node *node, + void *meta, __u64 off) __ksym; +#define bpf_list_push_back(head, node) bpf_list_push_back_impl(head, node, NULL, 0) + struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym; struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym; struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root, @@ -404,6 +412,17 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask) return (const struct cpumask *)mask; } +/* + * Return true if task @p cannot migrate to a different CPU, false + * otherwise. + */ +static inline bool is_migration_disabled(const struct task_struct *p) +{ + if (bpf_core_field_exists(p->migration_disabled)) + return p->migration_disabled; + return false; +} + /* rcu */ void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; @@ -421,7 +440,7 @@ void bpf_rcu_read_unlock(void) __ksym; */ static inline s64 time_delta(u64 after, u64 before) { - return (s64)(after - before) > 0 ? : 0; + return (s64)(after - before) > 0 ? (s64)(after - before) : 0; } /** diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 0aa4005017c7..45f4abef7064 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -136,6 +136,33 @@ else NO_SUBDIR = : endif +# Beautify output +# --------------------------------------------------------------------------- +# +# Most of build commands in Kbuild start with "cmd_". You can optionally define +# "quiet_cmd_*". If defined, the short log is printed. Otherwise, no log from +# that command is printed by default. +# +# e.g.) +# quiet_cmd_depmod = DEPMOD $(MODLIB) +# cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE) +# +# A simple variant is to prefix commands with $(Q) - that's useful +# for commands that shall be hidden in non-verbose mode. +# +# $(Q)$(MAKE) $(build)=scripts/basic +# +# To put more focus on warnings, be less verbose as default +# Use 'make V=1' to see the full commands + +ifeq ($(V),1) + quiet = + Q = +else + quiet = quiet_ + Q = @ +endif + # If the user is running make -s (silent mode), suppress echoing of commands # make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS. ifeq ($(filter 3.%,$(MAKE_VERSION)),) @@ -146,8 +173,11 @@ endif ifneq ($(findstring s,$(short-opts)),) silent=1 + quiet=silent_ endif +export quiet Q + # # Define a callable command for descending to a new directory # diff --git a/tools/sound/dapm-graph b/tools/sound/dapm-graph index f14bdfedee8f..b6196ee5065a 100755 --- a/tools/sound/dapm-graph +++ b/tools/sound/dapm-graph @@ -10,7 +10,7 @@ set -eu STYLE_COMPONENT_ON="color=dodgerblue;style=bold" STYLE_COMPONENT_OFF="color=gray40;style=filled;fillcolor=gray90" -STYLE_NODE_ON="shape=box,style=bold,color=green4" +STYLE_NODE_ON="shape=box,style=bold,color=green4,fillcolor=white" STYLE_NODE_OFF="shape=box,style=filled,color=gray30,fillcolor=gray95" # Print usage and exit diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs index eb6a4fea8c79..f7f9e7088bb3 100644 --- a/tools/testing/selftests/bpf/Makefile.docs +++ b/tools/testing/selftests/bpf/Makefile.docs @@ -7,12 +7,6 @@ INSTALL ?= install RM ?= rm -f RMDIR ?= rmdir --ignore-fail-on-non-empty -ifeq ($(V),1) - Q = -else - Q = @ -endif - prefix ?= /usr/local mandir ?= $(prefix)/man man2dir = $(mandir)/man2 diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c index 66191ae9863c..79c3ccadb962 100644 --- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c +++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c @@ -120,11 +120,12 @@ static void validate_fetch_results(int outer_map_fd, static void fetch_and_validate(int outer_map_fd, struct bpf_map_batch_opts *opts, - __u32 batch_size, bool delete_entries) + __u32 batch_size, bool delete_entries, + bool has_holes) { - __u32 *fetched_keys, *fetched_values, total_fetched = 0; - __u32 batch_key = 0, fetch_count, step_size; - int err, max_entries = OUTER_MAP_ENTRIES; + int err, max_entries = OUTER_MAP_ENTRIES - !!has_holes; + __u32 *fetched_keys, *fetched_values, total_fetched = 0, i; + __u32 batch_key = 0, fetch_count, step_size = batch_size; __u32 value_size = sizeof(__u32); /* Total entries needs to be fetched */ @@ -134,9 +135,8 @@ static void fetch_and_validate(int outer_map_fd, "Memory allocation failed for fetched_keys or fetched_values", "error=%s\n", strerror(errno)); - for (step_size = batch_size; - step_size <= max_entries; - step_size += batch_size) { + /* hash map may not always return full batch */ + for (i = 0; i < OUTER_MAP_ENTRIES; i++) { fetch_count = step_size; err = delete_entries ? bpf_map_lookup_and_delete_batch(outer_map_fd, @@ -155,6 +155,7 @@ static void fetch_and_validate(int outer_map_fd, if (err && errno == ENOSPC) { /* Fetch again with higher batch size */ total_fetched = 0; + step_size += batch_size; continue; } @@ -184,18 +185,19 @@ static void fetch_and_validate(int outer_map_fd, } static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, - enum bpf_map_type inner_map_type) + enum bpf_map_type inner_map_type, + bool has_holes) { + __u32 max_entries = OUTER_MAP_ENTRIES - !!has_holes; __u32 *outer_map_keys, *inner_map_fds; - __u32 max_entries = OUTER_MAP_ENTRIES; LIBBPF_OPTS(bpf_map_batch_opts, opts); __u32 value_size = sizeof(__u32); int batch_size[2] = {5, 10}; __u32 map_index, op_index; int outer_map_fd, ret; - outer_map_keys = calloc(max_entries, value_size); - inner_map_fds = calloc(max_entries, value_size); + outer_map_keys = calloc(OUTER_MAP_ENTRIES, value_size); + inner_map_fds = calloc(OUTER_MAP_ENTRIES, value_size); CHECK((!outer_map_keys || !inner_map_fds), "Memory allocation failed for outer_map_keys or inner_map_fds", "error=%s\n", strerror(errno)); @@ -209,6 +211,24 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, ((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) ? 9 : 1000) - map_index; + /* This condition is only meaningful for array of maps. + * + * max_entries == OUTER_MAP_ENTRIES - 1 if it is true. Say + * max_entries is short for n, then outer_map_keys looks like: + * + * [n, n-1, ... 2, 1] + * + * We change it to + * + * [n, n-1, ... 2, 0] + * + * So it will leave key 1 as a hole. It will serve to test the + * correctness when batch on an array: a "non-exist" key might be + * actually allocated and returned from key iteration. + */ + if (has_holes) + outer_map_keys[max_entries - 1]--; + /* batch operation - map_update */ ret = bpf_map_update_batch(outer_map_fd, outer_map_keys, inner_map_fds, &max_entries, &opts); @@ -219,15 +239,17 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, /* batch operation - map_lookup */ for (op_index = 0; op_index < 2; ++op_index) fetch_and_validate(outer_map_fd, &opts, - batch_size[op_index], false); + batch_size[op_index], false, + has_holes); /* batch operation - map_lookup_delete */ if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS) fetch_and_validate(outer_map_fd, &opts, - max_entries, true /*delete*/); + max_entries, true /*delete*/, + has_holes); /* close all map fds */ - for (map_index = 0; map_index < max_entries; map_index++) + for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) close(inner_map_fds[map_index]); close(outer_map_fd); @@ -237,16 +259,20 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type, void test_map_in_map_batch_ops_array(void) { - _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, false); printf("%s:PASS with inner ARRAY map\n", __func__); - _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, false); printf("%s:PASS with inner HASH map\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, true); + printf("%s:PASS with inner ARRAY map with holes\n", __func__); + _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, true); + printf("%s:PASS with inner HASH map with holes\n", __func__); } void test_map_in_map_batch_ops_hash(void) { - _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY); + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY, false); printf("%s:PASS with inner ARRAY map\n", __func__); - _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH); + _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH, false); printf("%s:PASS with inner HASH map\n", __func__); } diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c index 3729fbfd3084..80b153d3ddec 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c @@ -542,8 +542,12 @@ static void detach_program(struct bpf_flow *skel, int prog_fd) static int set_port_drop(int pf, bool multi_port) { + char dst_port[16]; + + snprintf(dst_port, sizeof(dst_port), "%d", CFG_PORT_INNER); + SYS(fail, "tc qdisc add dev lo ingress"); - SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s", + SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s %s %s", "dev lo", "parent FFFF:", "protocol", pf == PF_INET6 ? "ipv6" : "ip", @@ -551,6 +555,7 @@ static int set_port_drop(int pf, bool multi_port) "flower", "ip_proto udp", "src_port", multi_port ? "8-10" : "9", + "dst_port", dst_port, "action drop"); return 0; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 884ad87783d5..1e3e4392dcca 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -111,31 +111,35 @@ out: static void test_sockmap_vsock_delete_on_close(void) { - int err, c, p, map; - const int zero = 0; - - err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); - if (!ASSERT_OK(err, "create_pair(AF_VSOCK)")) - return; + int map, c, p, err, zero = 0; map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); - if (!ASSERT_GE(map, 0, "bpf_map_create")) { - close(c); - goto out; - } + if (!ASSERT_OK_FD(map, "bpf_map_create")) + return; - err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST); - close(c); - if (!ASSERT_OK(err, "bpf_map_update")) - goto out; + err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); + if (!ASSERT_OK(err, "create_pair")) + goto close_map; + + if (xbpf_map_update_elem(map, &zero, &c, BPF_NOEXIST)) + goto close_socks; + + xclose(c); + xclose(p); + + err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p); + if (!ASSERT_OK(err, "create_pair")) + goto close_map; - err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST); ASSERT_OK(err, "after close(), bpf_map_update"); -out: - close(p); - close(map); +close_socks: + xclose(c); + xclose(p); +close_map: + xclose(map); } static void test_skmsg_helpers(enum bpf_map_type map_type) @@ -522,8 +526,8 @@ static void test_sockmap_skb_verdict_shutdown(void) if (!ASSERT_EQ(err, 1, "epoll_wait(fd)")) goto out_close; - n = recv(c1, &b, 1, SOCK_NONBLOCK); - ASSERT_EQ(n, 0, "recv_timeout(fin)"); + n = recv(c1, &b, 1, MSG_DONTWAIT); + ASSERT_EQ(n, 0, "recv(fin)"); out_close: close(c1); close(p1); @@ -531,57 +535,6 @@ out: test_sockmap_pass_prog__destroy(skel); } -static void test_sockmap_stream_pass(void) -{ - int zero = 0, sent, recvd; - int verdict, parser; - int err, map; - int c = -1, p = -1; - struct test_sockmap_pass_prog *pass = NULL; - char snd[256] = "0123456789"; - char rcv[256] = "0"; - - pass = test_sockmap_pass_prog__open_and_load(); - verdict = bpf_program__fd(pass->progs.prog_skb_verdict); - parser = bpf_program__fd(pass->progs.prog_skb_parser); - map = bpf_map__fd(pass->maps.sock_map_rx); - - err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0); - if (!ASSERT_OK(err, "bpf_prog_attach stream parser")) - goto out; - - err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); - if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) - goto out; - - err = create_pair(AF_INET, SOCK_STREAM, &c, &p); - if (err) - goto out; - - /* sk_data_ready of 'p' will be replaced by strparser handler */ - err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); - if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) - goto out_close; - - /* - * as 'prog_skb_parser' return the original skb len and - * 'prog_skb_verdict' return SK_PASS, the kernel will just - * pass it through to original socket 'p' - */ - sent = xsend(c, snd, sizeof(snd), 0); - ASSERT_EQ(sent, sizeof(snd), "xsend(c)"); - - recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK, - IO_TIMEOUT_SEC); - ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)"); - -out_close: - close(c); - close(p); - -out: - test_sockmap_pass_prog__destroy(pass); -} static void test_sockmap_skb_verdict_fionread(bool pass_prog) { @@ -628,7 +581,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog) ASSERT_EQ(avail, expected, "ioctl(FIONREAD)"); /* On DROP test there will be no data to read */ if (pass_prog) { - recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC); + recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC); ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)"); } @@ -1061,6 +1014,34 @@ destroy: test_sockmap_pass_prog__destroy(skel); } +static void test_sockmap_vsock_unconnected(void) +{ + struct sockaddr_storage addr; + int map, s, zero = 0; + socklen_t alen; + + map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int), + sizeof(int), 1, NULL); + if (!ASSERT_OK_FD(map, "bpf_map_create")) + return; + + s = xsocket(AF_VSOCK, SOCK_STREAM, 0); + if (s < 0) + goto close_map; + + /* Fail connect(), but trigger transport assignment. */ + init_addr_loopback(AF_VSOCK, &addr, &alen); + if (!ASSERT_ERR(connect(s, sockaddr(&addr), alen), "connect")) + goto close_sock; + + ASSERT_ERR(bpf_map_update_elem(map, &zero, &s, BPF_ANY), "map_update"); + +close_sock: + xclose(s); +close_map: + xclose(map); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -1101,8 +1082,6 @@ void test_sockmap_basic(void) test_sockmap_progs_query(BPF_SK_SKB_VERDICT); if (test__start_subtest("sockmap skb_verdict shutdown")) test_sockmap_skb_verdict_shutdown(); - if (test__start_subtest("sockmap stream parser and verdict pass")) - test_sockmap_stream_pass(); if (test__start_subtest("sockmap skb_verdict fionread")) test_sockmap_skb_verdict_fionread(true); if (test__start_subtest("sockmap skb_verdict fionread on drop")) @@ -1127,4 +1106,6 @@ void test_sockmap_basic(void) test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH); if (test__start_subtest("sockmap skb_verdict vsock poll")) test_sockmap_skb_verdict_vsock_poll(); + if (test__start_subtest("sockmap vsock unconnected")) + test_sockmap_vsock_unconnected(); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c new file mode 100644 index 000000000000..621b3b71888e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c @@ -0,0 +1,454 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <netinet/tcp.h> +#include <test_progs.h> +#include "sockmap_helpers.h" +#include "test_skmsg_load_helpers.skel.h" +#include "test_sockmap_strp.skel.h" + +#define STRP_PKT_HEAD_LEN 4 +#define STRP_PKT_BODY_LEN 6 +#define STRP_PKT_FULL_LEN (STRP_PKT_HEAD_LEN + STRP_PKT_BODY_LEN) + +static const char packet[STRP_PKT_FULL_LEN] = "head+body\0"; +static const int test_packet_num = 100; + +/* Current implementation of tcp_bpf_recvmsg_parser() invokes data_ready + * with sk held if an skb exists in sk_receive_queue. Then for the + * data_ready implementation of strparser, it will delay the read + * operation if sk is held and EAGAIN is returned. + */ +static int sockmap_strp_consume_pre_data(int p) +{ + int recvd; + bool retried = false; + char rcv[10]; + +retry: + errno = 0; + recvd = recv_timeout(p, rcv, sizeof(rcv), 0, 1); + if (recvd < 0 && errno == EAGAIN && retried == false) { + /* On the first call, EAGAIN will certainly be returned. + * A 1-second wait is enough for the workqueue to finish. + */ + sleep(1); + retried = true; + goto retry; + } + + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv error or truncated data") || + !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN), + "data mismatch")) + return -1; + return 0; +} + +static struct test_sockmap_strp *sockmap_strp_init(int *out_map, bool pass, + bool need_parser) +{ + struct test_sockmap_strp *strp = NULL; + int verdict, parser; + int err; + + strp = test_sockmap_strp__open_and_load(); + *out_map = bpf_map__fd(strp->maps.sock_map); + + if (need_parser) + parser = bpf_program__fd(strp->progs.prog_skb_parser_partial); + else + parser = bpf_program__fd(strp->progs.prog_skb_parser); + + if (pass) + verdict = bpf_program__fd(strp->progs.prog_skb_verdict_pass); + else + verdict = bpf_program__fd(strp->progs.prog_skb_verdict); + + err = bpf_prog_attach(parser, *out_map, BPF_SK_SKB_STREAM_PARSER, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream parser")) + goto err; + + err = bpf_prog_attach(verdict, *out_map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) + goto err; + + return strp; +err: + test_sockmap_strp__destroy(strp); + return NULL; +} + +/* Dispatch packets to different socket by packet size: + * + * ------ ------ + * | pkt4 || pkt1 |... > remote socket + * ------ ------ / ------ ------ + * | pkt8 | pkt7 |... + * ------ ------ \ ------ ------ + * | pkt3 || pkt2 |... > local socket + * ------ ------ + */ +static void test_sockmap_strp_dispatch_pkt(int family, int sotype) +{ + int i, j, zero = 0, one = 1, recvd; + int err, map; + int c0 = -1, p0 = -1, c1 = -1, p1 = -1; + struct test_sockmap_strp *strp = NULL; + int test_cnt = 6; + char rcv[10]; + struct { + char data[7]; + int data_len; + int send_cnt; + int *receiver; + } send_dir[2] = { + /* data expected to deliver to local */ + {"llllll", 6, 0, &p0}, + /* data expected to deliver to remote */ + {"rrrrr", 5, 0, &c1} + }; + + strp = sockmap_strp_init(&map, false, false); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1); + if (!ASSERT_OK(err, "create_socket_pairs()")) + goto out; + + err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p0)")) + goto out_close; + + err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p1)")) + goto out_close; + + err = setsockopt(c1, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero)); + if (!ASSERT_OK(err, "setsockopt(TCP_NODELAY)")) + goto out_close; + + /* deliver data with data size greater than 5 to local */ + strp->data->verdict_max_size = 5; + + for (i = 0; i < test_cnt; i++) { + int d = i % 2; + + xsend(c0, send_dir[d].data, send_dir[d].data_len, 0); + send_dir[d].send_cnt++; + } + + for (i = 0; i < 2; i++) { + for (j = 0; j < send_dir[i].send_cnt; j++) { + int expected = send_dir[i].data_len; + + recvd = recv_timeout(*send_dir[i].receiver, rcv, + expected, MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, expected, "recv_timeout()")) + goto out_close; + if (!ASSERT_OK(memcmp(send_dir[i].data, rcv, recvd), + "data mismatch")) + goto out_close; + } + } +out_close: + close(c0); + close(c1); + close(p0); + close(p1); +out: + test_sockmap_strp__destroy(strp); +} + +/* We have multiple packets in one skb + * ------------ ------------ ------------ + * | packet1 | packet2 | ... + * ------------ ------------ ------------ + */ +static void test_sockmap_strp_multiple_pkt(int family, int sotype) +{ + int i, zero = 0; + int sent, recvd, total; + int err, map; + int c = -1, p = -1; + struct test_sockmap_strp *strp = NULL; + char *snd = NULL, *rcv = NULL; + + strp = sockmap_strp_init(&map, true, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_pair(family, sotype, &c, &p); + if (err) + goto out; + + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)")) + goto out_close; + + /* construct multiple packets in one buffer */ + total = test_packet_num * STRP_PKT_FULL_LEN; + snd = malloc(total); + rcv = malloc(total + 1); + if (!ASSERT_TRUE(snd, "malloc(snd)") || + !ASSERT_TRUE(rcv, "malloc(rcv)")) + goto out_close; + + for (i = 0; i < test_packet_num; i++) { + memcpy(snd + i * STRP_PKT_FULL_LEN, + packet, STRP_PKT_FULL_LEN); + } + + sent = xsend(c, snd, total, 0); + if (!ASSERT_EQ(sent, total, "xsend(c)")) + goto out_close; + + /* try to recv one more byte to avoid truncation check */ + recvd = recv_timeout(p, rcv, total + 1, MSG_DONTWAIT, IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, total, "recv(rcv)")) + goto out_close; + + /* we sent TCP segment with multiple encapsulation + * then check whether packets are handled correctly + */ + if (!ASSERT_OK(memcmp(snd, rcv, total), "data mismatch")) + goto out_close; + +out_close: + close(c); + close(p); + if (snd) + free(snd); + if (rcv) + free(rcv); +out: + test_sockmap_strp__destroy(strp); +} + +/* Test strparser with partial read */ +static void test_sockmap_strp_partial_read(int family, int sotype) +{ + int zero = 0, recvd, off; + int err, map; + int c = -1, p = -1; + struct test_sockmap_strp *strp = NULL; + char rcv[STRP_PKT_FULL_LEN + 1] = "0"; + + strp = sockmap_strp_init(&map, true, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_pair(family, sotype, &c, &p); + if (err) + goto out; + + /* sk_data_ready of 'p' will be replaced by strparser handler */ + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)")) + goto out_close; + + /* 1.1 send partial head, 1 byte header left */ + off = STRP_PKT_HEAD_LEN - 1; + xsend(c, packet, off, 0); + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(-1, recvd, "partial head sent, expected no data")) + goto out_close; + + /* 1.2 send remaining head and body */ + xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0); + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data")) + goto out_close; + + /* 2.1 send partial head, 1 byte header left */ + off = STRP_PKT_HEAD_LEN - 1; + xsend(c, packet, off, 0); + + /* 2.2 send remaining head and partial body, 1 byte body left */ + xsend(c, packet + off, STRP_PKT_FULL_LEN - off - 1, 0); + off = STRP_PKT_FULL_LEN - 1; + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(-1, recvd, "partial body sent, expected no data")) + goto out_close; + + /* 2.3 send remaining body */ + xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0); + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data")) + goto out_close; + +out_close: + close(c); + close(p); + +out: + test_sockmap_strp__destroy(strp); +} + +/* Test simple socket read/write with strparser + FIONREAD */ +static void test_sockmap_strp_pass(int family, int sotype, bool fionread) +{ + int zero = 0, pkt_size = STRP_PKT_FULL_LEN, sent, recvd, avail; + int err, map; + int c = -1, p = -1; + int test_cnt = 10, i; + struct test_sockmap_strp *strp = NULL; + char rcv[STRP_PKT_FULL_LEN + 1] = "0"; + + strp = sockmap_strp_init(&map, true, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + err = create_pair(family, sotype, &c, &p); + if (err) + goto out; + + /* inject some data before bpf process, it should be read + * correctly because we check sk_receive_queue in + * tcp_bpf_recvmsg_parser(). + */ + sent = xsend(c, packet, pkt_size, 0); + if (!ASSERT_EQ(sent, pkt_size, "xsend(pre-data)")) + goto out_close; + + /* sk_data_ready of 'p' will be replaced by strparser handler */ + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) + goto out_close; + + /* consume previous data we injected */ + if (sockmap_strp_consume_pre_data(p)) + goto out_close; + + /* Previously, we encountered issues such as deadlocks and + * sequence errors that resulted in the inability to read + * continuously. Therefore, we perform multiple iterations + * of testing here. + */ + for (i = 0; i < test_cnt; i++) { + sent = xsend(c, packet, pkt_size, 0); + if (!ASSERT_EQ(sent, pkt_size, "xsend(c)")) + goto out_close; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, pkt_size, "recv_timeout(p)") || + !ASSERT_OK(memcmp(packet, rcv, pkt_size), + "memcmp, data mismatch")) + goto out_close; + } + + if (fionread) { + sent = xsend(c, packet, pkt_size, 0); + if (!ASSERT_EQ(sent, pkt_size, "second xsend(c)")) + goto out_close; + + err = ioctl(p, FIONREAD, &avail); + if (!ASSERT_OK(err, "ioctl(FIONREAD) error") || + !ASSERT_EQ(avail, pkt_size, "ioctl(FIONREAD)")) + goto out_close; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, pkt_size, "second recv_timeout(p)") || + !ASSERT_OK(memcmp(packet, rcv, pkt_size), + "second memcmp, data mismatch")) + goto out_close; + } + +out_close: + close(c); + close(p); + +out: + test_sockmap_strp__destroy(strp); +} + +/* Test strparser with verdict mode */ +static void test_sockmap_strp_verdict(int family, int sotype) +{ + int zero = 0, one = 1, sent, recvd, off; + int err, map; + int c0 = -1, p0 = -1, c1 = -1, p1 = -1; + struct test_sockmap_strp *strp = NULL; + char rcv[STRP_PKT_FULL_LEN + 1] = "0"; + + strp = sockmap_strp_init(&map, false, true); + if (!ASSERT_TRUE(strp, "sockmap_strp_init")) + return; + + /* We simulate a reverse proxy server. + * When p0 receives data from c0, we forward it to c1. + * From c1's perspective, it will consider this data + * as being sent by p1. + */ + err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1); + if (!ASSERT_OK(err, "create_socket_pairs()")) + goto out; + + err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p0)")) + goto out_close; + + err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p1)")) + goto out_close; + + sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0); + if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "xsend(c0)")) + goto out_close; + + recvd = recv_timeout(c1, rcv, sizeof(rcv), MSG_DONTWAIT, + IO_TIMEOUT_SEC); + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv_timeout(c1)") || + !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN), + "received data does not match the sent data")) + goto out_close; + + /* send again to ensure the stream is functioning correctly. */ + sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0); + if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "second xsend(c0)")) + goto out_close; + + /* partial read */ + off = STRP_PKT_FULL_LEN / 2; + recvd = recv_timeout(c1, rcv, off, MSG_DONTWAIT, + IO_TIMEOUT_SEC); + recvd += recv_timeout(c1, rcv + off, sizeof(rcv) - off, MSG_DONTWAIT, + IO_TIMEOUT_SEC); + + if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "partial recv_timeout(c1)") || + !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN), + "partial received data does not match the sent data")) + goto out_close; + +out_close: + close(c0); + close(c1); + close(p0); + close(p1); +out: + test_sockmap_strp__destroy(strp); +} + +void test_sockmap_strp(void) +{ + if (test__start_subtest("sockmap strp tcp pass")) + test_sockmap_strp_pass(AF_INET, SOCK_STREAM, false); + if (test__start_subtest("sockmap strp tcp v6 pass")) + test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, false); + if (test__start_subtest("sockmap strp tcp pass fionread")) + test_sockmap_strp_pass(AF_INET, SOCK_STREAM, true); + if (test__start_subtest("sockmap strp tcp v6 pass fionread")) + test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, true); + if (test__start_subtest("sockmap strp tcp verdict")) + test_sockmap_strp_verdict(AF_INET, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp v6 verdict")) + test_sockmap_strp_verdict(AF_INET6, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp partial read")) + test_sockmap_strp_partial_read(AF_INET, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp multiple packets")) + test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM); + if (test__start_subtest("sockmap strp tcp dispatch")) + test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index c7f74f068e78..df27535995af 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -52,10 +52,10 @@ static void test_xdp_with_cpumap_helpers(void) ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id"); /* send a packet to trigger any potential bugs in there */ - char data[10] = {}; + char data[ETH_HLEN] = {}; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = 10, + .data_size_in = sizeof(data), .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index 27ffed17d4be..461ab18705d5 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -23,7 +23,7 @@ static void test_xdp_with_devmap_helpers(void) __u32 len = sizeof(info); int err, dm_fd, dm_fd_redir, map_fd; struct nstoken *nstoken = NULL; - char data[10] = {}; + char data[ETH_HLEN] = {}; __u32 idx = 0; SYS(out_close, "ip netns add %s", TEST_NS); @@ -58,7 +58,7 @@ static void test_xdp_with_devmap_helpers(void) /* send a packet to trigger any potential bugs in there */ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = 10, + .data_size_in = sizeof(data), .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); @@ -158,7 +158,7 @@ static void test_xdp_with_devmap_helpers_veth(void) struct nstoken *nstoken = NULL; __u32 len = sizeof(info); int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst; - char data[10] = {}; + char data[ETH_HLEN] = {}; __u32 idx = 0; SYS(out_close, "ip netns add %s", TEST_NS); @@ -208,7 +208,7 @@ static void test_xdp_with_devmap_helpers_veth(void) /* send a packet to trigger any potential bugs in there */ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = 10, + .data_size_in = sizeof(data), .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c new file mode 100644 index 000000000000..dde3d5bec515 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +int verdict_max_size = 10000; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 20); + __type(key, int); + __type(value, int); +} sock_map SEC(".maps"); + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict(struct __sk_buff *skb) +{ + __u32 one = 1; + + if (skb->len > verdict_max_size) + return SK_PASS; + + return bpf_sk_redirect_map(skb, &sock_map, one, 0); +} + +SEC("sk_skb/stream_verdict") +int prog_skb_verdict_pass(struct __sk_buff *skb) +{ + return SK_PASS; +} + +SEC("sk_skb/stream_parser") +int prog_skb_parser(struct __sk_buff *skb) +{ + return skb->len; +} + +SEC("sk_skb/stream_parser") +int prog_skb_parser_partial(struct __sk_buff *skb) +{ + /* agreement with the test program on a 4-byte size header + * and 6-byte body. + */ + if (skb->len < 4) { + /* need more header to determine full length */ + return 0; + } + /* return full length decoded from header. + * the return value may be larger than skb->len which + * means framework must wait body coming. + */ + return 10; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c index 29eb9568633f..0a187ff725cc 100644 --- a/tools/testing/selftests/bpf/progs/verifier_array_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c @@ -713,4 +713,19 @@ unsigned int non_stack_key_lookup(void) return val->index; } +SEC("socket") +__description("doesn't reject UINT64_MAX as s64 for irrelevant maps") +__success __retval(42) +unsigned int doesnt_reject_irrelevant_maps(void) +{ + __u64 key = 0xFFFFFFFFFFFFFFFF; + struct test_val *val; + + val = bpf_map_lookup_elem(&map_hash_48b, &key); + if (val) + return val->index; + + return 42; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh index 3f45512fb512..7406c24be1ac 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Test the special cpuset v1 hotplug case where a cpuset become empty of diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 394971b25c0b..873f5219e41d 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -2,17 +2,54 @@ # SPDX-License-Identifier: GPL-2.0 import errno +import os from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx -from lib.py import EthtoolFamily, NlError +from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv +from lib.py import defer, ethtool, ip -def get_hds(cfg, netnl) -> None: + +def _get_hds_mode(cfg, netnl) -> str: try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) except NlError as e: raise KsftSkipEx('ring-get not supported by device') if 'tcp-data-split' not in rings: raise KsftSkipEx('tcp-data-split not supported by device') + return rings['tcp-data-split'] + + +def _xdp_onoff(cfg): + test_dir = os.path.dirname(os.path.realpath(__file__)) + prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o" + ip("link set dev %s xdp obj %s sec xdp" % + (cfg.ifname, prog)) + ip("link set dev %s xdp off" % cfg.ifname) + + +def _ioctl_ringparam_modify(cfg, netnl) -> None: + """ + Helper for performing a hopefully unimportant IOCTL SET. + IOCTL does not support HDS, so it should not affect the HDS config. + """ + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + + if 'tx' not in rings: + raise KsftSkipEx('setting Tx ring size not supported') + + try: + ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] // 2}") + except CmdExitFailure as e: + ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] * 2}") + defer(ethtool, f"-G {cfg.ifname} tx {rings['tx']}") + + +def get_hds(cfg, netnl) -> None: + _get_hds_mode(cfg, netnl) + def get_hds_thresh(cfg, netnl) -> None: try: @@ -104,6 +141,103 @@ def set_hds_thresh_gt(cfg, netnl) -> None: netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt}) ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + +def set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "auto" / UNKNOWN mode, XDP installation should work. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _xdp_onoff(cfg) + + +def enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + +def set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "auto" / UNKNOWN mode, XDP installation should work. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _xdp_onoff(cfg) + + +def enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) # Trigger skip if not supported + + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + +def ioctl(cfg, netnl) -> None: + mode1 = _get_hds_mode(cfg, netnl) + _ioctl_ringparam_modify(cfg, netnl) + mode2 = _get_hds_mode(cfg, netnl) + + ksft_eq(mode1, mode2) + + +def ioctl_set_xdp(cfg, netnl) -> None: + """ + Like set_xdp(), but we perturb the settings via the legacy ioctl. + """ + mode = _get_hds_mode(cfg, netnl) + if mode == 'enabled': + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + _ioctl_ringparam_modify(cfg, netnl) + + _xdp_onoff(cfg) + + +def ioctl_enabled_set_xdp(cfg, netnl) -> None: + """ + Enable single-buffer XDP on the device. + When HDS is in "enabled" mode, XDP installation should not work. + """ + _get_hds_mode(cfg, netnl) # Trigger skip if not supported + + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled'}) + defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'unknown'}) + + with ksft_raises(CmdExitFailure) as e: + _xdp_onoff(cfg) + + def main() -> None: with NetDrvEnv(__file__, queue_count=3) as cfg: ksft_run([get_hds, @@ -112,7 +246,12 @@ def main() -> None: set_hds_enable, set_hds_thresh_zero, set_hds_thresh_max, - set_hds_thresh_gt], + set_hds_thresh_gt, + set_xdp, + enabled_set_xdp, + ioctl, + ioctl_set_xdp, + ioctl_enabled_set_xdp], args=(cfg, EthtoolFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ca8a7edff3dd..319aaa004c40 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -252,6 +252,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): try: # this targets queue 4, which doesn't exist ntuple2 = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") except CmdExitFailure: pass else: @@ -259,7 +260,13 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): # change the table to target queues 0 and 2 ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0") # ntuple rule therefore targets queues 1 and 3 - ntuple2 = ethtool_create(cfg, "-N", flow) + try: + ntuple2 = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + ksft_pr("Driver does not support rss + queue offset") + return + + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") # should replace existing filter ksft_eq(ntuple, ntuple2) _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3), diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 38303da957ee..8a518905a9f9 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -45,10 +45,9 @@ def addremove_queues(cfg, nl) -> None: netnl = EthtoolFamily() channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}}) - if channels['combined-count'] == 0: - rx_type = 'rx' - else: - rx_type = 'combined' + rx_type = 'rx' + if channels.get('combined-count', 0) > 0: + rx_type = 'combined' expected = curr_queues - 1 cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index 8eb6aa606a0d..46d289611ce8 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -383,6 +383,10 @@ static void test_statmount_mnt_point(void) return; } + if (!(sm->mask & STATMOUNT_MNT_POINT)) { + ksft_test_result_fail("missing STATMOUNT_MNT_POINT in mask\n"); + return; + } if (strcmp(sm->str + sm->mnt_point, "/") != 0) { ksft_test_result_fail("unexpected mount point: '%s' != '/'\n", sm->str + sm->mnt_point); @@ -408,6 +412,10 @@ static void test_statmount_mnt_root(void) strerror(errno)); return; } + if (!(sm->mask & STATMOUNT_MNT_ROOT)) { + ksft_test_result_fail("missing STATMOUNT_MNT_ROOT in mask\n"); + return; + } mnt_root = sm->str + sm->mnt_root; last_root = strrchr(mnt_root, '/'); if (last_root) @@ -437,6 +445,10 @@ static void test_statmount_fs_type(void) strerror(errno)); return; } + if (!(sm->mask & STATMOUNT_FS_TYPE)) { + ksft_test_result_fail("missing STATMOUNT_FS_TYPE in mask\n"); + return; + } fs_type = sm->str + sm->fs_type; for (s = known_fs; s != NULL; s++) { if (strcmp(fs_type, *s) == 0) @@ -464,6 +476,11 @@ static void test_statmount_mnt_opts(void) return; } + if (!(sm->mask & STATMOUNT_MNT_BASIC)) { + ksft_test_result_fail("missing STATMOUNT_MNT_BASIC in mask\n"); + return; + } + while (getline(&line, &len, f_mountinfo) != -1) { int i; char *p, *p2; @@ -514,7 +531,10 @@ static void test_statmount_mnt_opts(void) if (p2) *p2 = '\0'; - statmount_opts = sm->str + sm->mnt_opts; + if (sm->mask & STATMOUNT_MNT_OPTS) + statmount_opts = sm->str + sm->mnt_opts; + else + statmount_opts = ""; if (strcmp(statmount_opts, p) != 0) ksft_test_result_fail( "unexpected mount options: '%s' != '%s'\n", diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc index dc25bcf4f9e2..73f6c6fcecab 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc @@ -7,12 +7,42 @@ echo 0 > events/enable echo > dynamic_events PLACE=$FUNCTION_FORK +PLACE2="kmem_cache_free" +PLACE3="schedule_timeout" + +# Some functions may have BPF programs attached, therefore +# count already enabled_functions before tests start +ocnt=`cat enabled_functions | wc -l` echo "f:myevent1 $PLACE" >> dynamic_events + +# Make sure the event is attached and is the only one +grep -q $PLACE enabled_functions +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $((ocnt + 1)) ]; then + exit_fail +fi + echo "f:myevent2 $PLACE%return" >> dynamic_events +# It should till be the only attached function +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $((ocnt + 1)) ]; then + exit_fail +fi + +# add another event +echo "f:myevent3 $PLACE2" >> dynamic_events + +grep -q $PLACE2 enabled_functions +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $((ocnt + 2)) ]; then + exit_fail +fi + grep -q myevent1 dynamic_events grep -q myevent2 dynamic_events +grep -q myevent3 dynamic_events test -d events/fprobes/myevent1 test -d events/fprobes/myevent2 @@ -21,6 +51,34 @@ echo "-:myevent2" >> dynamic_events grep -q myevent1 dynamic_events ! grep -q myevent2 dynamic_events +# should still have 2 left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $((ocnt + 2)) ]; then + exit_fail +fi + echo > dynamic_events +# Should have none left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $ocnt ]; then + exit_fail +fi + +echo "f:myevent4 $PLACE" >> dynamic_events + +# Should only have one enabled +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $((ocnt + 1)) ]; then + exit_fail +fi + +echo > dynamic_events + +# Should have none left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne $ocnt ]; then + exit_fail +fi + clear_trace diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 0336353bd15f..2839d2612ce3 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -43,10 +43,8 @@ TEST_GEN_PROGS = hid_bpf hidraw # $3 - target (assumed to be file); only file name will be emitted; # $4 - optional extra arg, emitted as-is, if provided. ifeq ($(V),1) -Q = msg = else -Q = @ msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))"; MAKEFLAGS += --no-print-directory submake_extras := feature_display=0 diff --git a/tools/testing/selftests/kvm/s390/cmma_test.c b/tools/testing/selftests/kvm/s390/cmma_test.c index e32dd59703a0..85cc8c18d6e7 100644 --- a/tools/testing/selftests/kvm/s390/cmma_test.c +++ b/tools/testing/selftests/kvm/s390/cmma_test.c @@ -444,7 +444,7 @@ static void assert_no_pages_cmma_dirty(struct kvm_vm *vm) ); } -static void test_get_inital_dirty(void) +static void test_get_initial_dirty(void) { struct kvm_vm *vm = create_vm_two_memslots(); struct kvm_vcpu *vcpu; @@ -651,7 +651,7 @@ struct testdef { } testlist[] = { { "migration mode and dirty tracking", test_migration_mode }, { "GET_CMMA_BITS: basic calls", test_get_cmma_basic }, - { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty }, + { "GET_CMMA_BITS: all pages are dirty initially", test_get_initial_dirty }, { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes }, }; diff --git a/tools/testing/selftests/kvm/s390/ucontrol_test.c b/tools/testing/selftests/kvm/s390/ucontrol_test.c index 135ee22856cf..d265b34c54be 100644 --- a/tools/testing/selftests/kvm/s390/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390/ucontrol_test.c @@ -88,10 +88,6 @@ asm("test_skey_asm:\n" " ahi %r0,1\n" " st %r1,0(%r5,%r6)\n" - " iske %r1,%r6\n" - " ahi %r0,1\n" - " diag 0,0,0x44\n" - " sske %r1,%r6\n" " xgr %r1,%r1\n" " iske %r1,%r6\n" @@ -459,10 +455,14 @@ TEST_F(uc_kvm, uc_no_user_region) }; ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion)); - ASSERT_EQ(EINVAL, errno); + ASSERT_TRUE(errno == EEXIST || errno == EINVAL) + TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION", + strerror(errno), errno); ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2)); - ASSERT_EQ(EINVAL, errno); + ASSERT_TRUE(errno == EEXIST || errno == EINVAL) + TH_LOG("errno %s (%i) not expected for ioctl KVM_SET_USER_MEMORY_REGION2", + strerror(errno), errno); } TEST_F(uc_kvm, uc_map_unmap) @@ -596,7 +596,9 @@ TEST_F(uc_kvm, uc_skey) ASSERT_EQ(true, uc_handle_exit(self)); ASSERT_EQ(1, sync_regs->gprs[0]); - /* ISKE */ + /* SSKE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; ASSERT_EQ(0, uc_run_once(self)); /* @@ -608,21 +610,11 @@ TEST_F(uc_kvm, uc_skey) TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)); TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); - TEST_REQUIRE(sie_block->ipa != 0xb229); + TEST_REQUIRE(sie_block->ipa != 0xb22b); - /* ISKE contd. */ + /* SSKE + ISKE contd. */ ASSERT_EQ(false, uc_handle_exit(self)); ASSERT_EQ(2, sync_regs->gprs[0]); - /* assert initial skey (ACC = 0, R & C = 1) */ - ASSERT_EQ(0x06, sync_regs->gprs[1]); - uc_assert_diag44(self); - - /* SSKE + ISKE */ - sync_regs->gprs[1] = skeyvalue; - run->kvm_dirty_regs |= KVM_SYNC_GPRS; - ASSERT_EQ(0, uc_run_once(self)); - ASSERT_EQ(false, uc_handle_exit(self)); - ASSERT_EQ(3, sync_regs->gprs[0]); ASSERT_EQ(skeyvalue, sync_regs->gprs[1]); uc_assert_diag44(self); @@ -631,7 +623,7 @@ TEST_F(uc_kvm, uc_skey) run->kvm_dirty_regs |= KVM_SYNC_GPRS; ASSERT_EQ(0, uc_run_once(self)); ASSERT_EQ(false, uc_handle_exit(self)); - ASSERT_EQ(4, sync_regs->gprs[0]); + ASSERT_EQ(3, sync_regs->gprs[0]); /* assert R reset but rest of skey unchanged */ ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]); ASSERT_EQ(0, sync_regs->gprs[1] & 0x04); diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c index 4f5881d4ef66..4e920705681a 100644 --- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c @@ -41,13 +41,19 @@ static bool smt_possible(void) return res; } -static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, - bool evmcs_expected) +static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected) { + const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip; + const struct kvm_cpuid2 *hv_cpuid_entries; int i; int nent_expected = 10; u32 test_val; + if (vcpu) + hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); + else + hv_cpuid_entries = kvm_get_supported_hv_cpuid(); + TEST_ASSERT(hv_cpuid_entries->nent == nent_expected, "KVM_GET_SUPPORTED_HV_CPUID should return %d entries" " (returned %d)", @@ -80,12 +86,19 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, entry->eax, evmcs_expected ); break; + case 0x40000003: + TEST_ASSERT(has_irqchip || !(entry->edx & BIT(19)), + "\"Direct\" Synthetic Timers should require in-kernel APIC"); + break; case 0x40000004: test_val = entry->eax & (1UL << 18); TEST_ASSERT(!!test_val == !smt_possible(), "NoNonArchitecturalCoreSharing bit" " doesn't reflect SMT setting"); + + TEST_ASSERT(has_irqchip || !(entry->eax & BIT(10)), + "Cluster IPI (i.e. SEND_IPI) should require in-kernel APIC"); break; case 0x4000000A: TEST_ASSERT(entry->eax & (1UL << 19), @@ -109,9 +122,16 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, * entry->edx); */ } + + /* + * Note, the CPUID array returned by the system-scoped helper is a one- + * time allocation, i.e. must not be freed. + */ + if (vcpu) + free((void *)hv_cpuid_entries); } -void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +static void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { static struct kvm_cpuid2 cpuid = {.nent = 0}; int ret; @@ -129,19 +149,20 @@ void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) int main(int argc, char *argv[]) { struct kvm_vm *vm; - const struct kvm_cpuid2 *hv_cpuid_entries; struct kvm_vcpu *vcpu; TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); + /* Test the vCPU ioctl without an in-kernel local APIC. */ + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); + test_hv_cpuid(vcpu, false); + kvm_vm_free(vm); /* Test vCPU ioctl version */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); test_hv_cpuid_e2big(vm, vcpu); - - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, false); - free((void *)hv_cpuid_entries); + test_hv_cpuid(vcpu, false); if (!kvm_cpu_has(X86_FEATURE_VMX) || !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { @@ -149,9 +170,7 @@ int main(int argc, char *argv[]) goto do_sys; } vcpu_enable_evmcs(vcpu); - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); - test_hv_cpuid(hv_cpuid_entries, true); - free((void *)hv_cpuid_entries); + test_hv_cpuid(vcpu, true); do_sys: /* Test system ioctl version */ @@ -161,9 +180,7 @@ do_sys: } test_hv_cpuid_e2big(vm, NULL); - - hv_cpuid_entries = kvm_get_supported_hv_cpuid(); - test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX)); + test_hv_cpuid(NULL, kvm_cpu_has(X86_FEATURE_VMX)); out: kvm_vm_free(vm); diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore index 470203a7cd73..335b2b1a3463 100644 --- a/tools/testing/selftests/landlock/.gitignore +++ b/tools/testing/selftests/landlock/.gitignore @@ -1,2 +1,4 @@ /*_test +/sandbox-and-launch /true +/wait-pipe diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h index a604ea5d8297..6064c9ac0532 100644 --- a/tools/testing/selftests/landlock/common.h +++ b/tools/testing/selftests/landlock/common.h @@ -207,6 +207,7 @@ enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd) struct protocol_variant { int domain; int type; + int protocol; }; struct service_fixture { diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 29af19c4e9f9..425de4c20271 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -1,8 +1,11 @@ +CONFIG_AF_UNIX_OOB=y CONFIG_CGROUPS=y CONFIG_CGROUP_SCHED=y CONFIG_INET=y CONFIG_IPV6=y CONFIG_KEYS=y +CONFIG_MPTCP=y +CONFIG_MPTCP_IPV6=y CONFIG_NET=y CONFIG_NET_NS=y CONFIG_OVERLAY_FS=y diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c index 4e0aeb53b225..d9de0ee49ebc 100644 --- a/tools/testing/selftests/landlock/net_test.c +++ b/tools/testing/selftests/landlock/net_test.c @@ -85,18 +85,18 @@ static void setup_loopback(struct __test_metadata *const _metadata) clear_ambient_cap(_metadata, CAP_NET_ADMIN); } +static bool prot_is_tcp(const struct protocol_variant *const prot) +{ + return (prot->domain == AF_INET || prot->domain == AF_INET6) && + prot->type == SOCK_STREAM && + (prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP); +} + static bool is_restricted(const struct protocol_variant *const prot, const enum sandbox_type sandbox) { - switch (prot->domain) { - case AF_INET: - case AF_INET6: - switch (prot->type) { - case SOCK_STREAM: - return sandbox == TCP_SANDBOX; - } - break; - } + if (sandbox == TCP_SANDBOX) + return prot_is_tcp(prot); return false; } @@ -105,7 +105,7 @@ static int socket_variant(const struct service_fixture *const srv) int ret; ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC, - 0); + srv->protocol.protocol); if (ret < 0) return -errno; return ret; @@ -290,22 +290,70 @@ FIXTURE_TEARDOWN(protocol) } /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) { +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp1) { /* clang-format on */ .sandbox = NO_SANDBOX, .prot = { .domain = AF_INET, .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, }, }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) { +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_mptcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) { /* clang-format on */ .sandbox = NO_SANDBOX, .prot = { .domain = AF_INET6, .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_mptcp) { + /* clang-format on */ + .sandbox = NO_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, }, }; @@ -350,22 +398,70 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) { }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) { +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp1) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_mptcp) { /* clang-format on */ .sandbox = TCP_SANDBOX, .prot = { .domain = AF_INET, .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + /* IPPROTO_IP == 0 */ + .protocol = IPPROTO_IP, + }, +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) { + /* clang-format on */ + .sandbox = TCP_SANDBOX, + .prot = { + .domain = AF_INET6, + .type = SOCK_STREAM, + .protocol = IPPROTO_TCP, }, }; /* clang-format off */ -FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) { +FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_mptcp) { /* clang-format on */ .sandbox = TCP_SANDBOX, .prot = { .domain = AF_INET6, .type = SOCK_STREAM, + .protocol = IPPROTO_MPTCP, }, }; diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index e5d06fb40233..15601402dee6 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -306,7 +306,8 @@ function check_result { result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ - sed 's/^\[[ 0-9.]*\] //') + sed 's/^\[[ 0-9.]*\] //' | \ + sed 's/^\[[ ]*[CT][0-9]*\] //') if [[ "$expect" == "$result" ]] ; then echo "ok" diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 333c468c2699..da7e26668103 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -220,7 +220,7 @@ run_test() { if test_selected ${CATEGORY}; then # On memory constrainted systems some tests can fail to allocate hugepages. # perform some cleanup before the test for a higher success rate. - if [ ${CATEGORY} == "thp" ] | [ ${CATEGORY} == "hugetlb" ]; then + if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then echo 3 > /proc/sys/vm/drop_caches sleep 2 echo 1 > /proc/sys/vm/compact_memory diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh index 3885a2a91f7d..baed5e380dae 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh @@ -20,6 +20,7 @@ ALL_TESTS=" test_port_range_ipv4_tcp test_port_range_ipv6_udp test_port_range_ipv6_tcp + test_port_range_ipv4_udp_drop " NUM_NETIFS=4 @@ -194,6 +195,51 @@ test_port_range_ipv6_tcp() __test_port_range $proto $ip_proto $sip $dip $mode "$name" } +test_port_range_ipv4_udp_drop() +{ + local proto=ipv4 + local ip_proto=udp + local sip=192.0.2.1 + local dip=192.0.2.2 + local mode="-4" + local name="IPv4 UDP Drop" + local dmac=$(mac_get $h2) + local smac=$(mac_get $h1) + local sport_min=2000 + local sport_max=3000 + local sport_mid=$((sport_min + (sport_max - sport_min) / 2)) + local dport=5000 + + RET=0 + + tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \ + flower src_ip $sip dst_ip $dip ip_proto $ip_proto \ + src_port $sport_min-$sport_max \ + dst_port $dport \ + action drop + + # Test ports outside range - should pass + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_min - 1)),dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_max + 1)),dp=$dport" + + # Test ports inside range - should be dropped + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_min,dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_mid,dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_max,dp=$dport" + + tc_check_packets "dev $swp1 ingress" 101 3 + check_err $? "Filter did not drop the expected number of packets" + + tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower + + log_test "Port range matching - $name" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index bc6b6762baf3..c22623b9a2a5 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -9,7 +9,10 @@ TEST_FILES := ../../../../../Documentation/netlink/specs TEST_FILES += ../../../../net/ynl TEST_GEN_FILES += csum +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) include ../../lib.mk + +include ../bpf.mk diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c new file mode 100644 index 000000000000..d988b2e0cee8 --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("xdp") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 414addef9a45..d240d02fa443 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1302,7 +1302,7 @@ again: return ret; if (cfg_truncate > 0) { - xdisconnect(fd); + shutdown(fd, SHUT_WR); } else if (--cfg_repeat > 0) { xdisconnect(fd); diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3f2fca02fec5..36ff28af4b19 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -103,6 +103,19 @@ struct testcase testcases_v4[] = { .r_num_mss = 1, }, { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V4, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4 + 2, + .tfail = true, + }, + { /* send a single MSS + 1B */ .tlen = CONST_MSS_V4 + 1, .gso_len = CONST_MSS_V4, @@ -206,6 +219,19 @@ struct testcase testcases_v6[] = { .r_num_mss = 1, }, { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V6, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6 + 2, + .tfail = true + }, + { /* send a single MSS + 1B */ .tlen = CONST_MSS_V6 + 1, .gso_len = CONST_MSS_V6, diff --git a/tools/testing/selftests/rseq/rseq-riscv-bits.h b/tools/testing/selftests/rseq/rseq-riscv-bits.h index de31a0143139..f02f411d550d 100644 --- a/tools/testing/selftests/rseq/rseq-riscv-bits.h +++ b/tools/testing/selftests/rseq/rseq-riscv-bits.h @@ -243,7 +243,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i #ifdef RSEQ_COMPARE_TWICE RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]") #endif - RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3) + RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, 3) RSEQ_INJECT_ASM(4) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ @@ -251,8 +251,8 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD), [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [ptr] "r" (ptr), - [off] "er" (off), - [inc] "er" (inc) + [off] "r" (off), + [inc] "r" (inc) RSEQ_INJECT_INPUT : "memory", RSEQ_ASM_TMP_REG_1 RSEQ_INJECT_CLOBBER diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h index 37e598d0a365..67d544aaa9a3 100644 --- a/tools/testing/selftests/rseq/rseq-riscv.h +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -158,7 +158,7 @@ do { \ "bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \ "333:\n" -#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label) \ +#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, post_commit_label) \ "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n" \ RSEQ_ASM_OP_R_ADD(off) \ REG_L RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \ diff --git a/tools/testing/selftests/sched_ext/create_dsq.c b/tools/testing/selftests/sched_ext/create_dsq.c index fa946d9146d4..d67431f57ac6 100644 --- a/tools/testing/selftests/sched_ext/create_dsq.c +++ b/tools/testing/selftests/sched_ext/create_dsq.c @@ -14,11 +14,11 @@ static enum scx_test_status setup(void **ctx) { struct create_dsq *skel; - skel = create_dsq__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = create_dsq__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(create_dsq__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c index e65d22f23f3b..b6d13496b24e 100644 --- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c +++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct ddsp_bogus_dsq_fail *skel; - skel = ddsp_bogus_dsq_fail__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = ddsp_bogus_dsq_fail__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(ddsp_bogus_dsq_fail__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c index abafee587cd6..af9ce4ee8baa 100644 --- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c +++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c @@ -14,8 +14,11 @@ static enum scx_test_status setup(void **ctx) { struct ddsp_vtimelocal_fail *skel; - skel = ddsp_vtimelocal_fail__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = ddsp_vtimelocal_fail__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(ddsp_vtimelocal_fail__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index fbda6bf54671..c02b2aa6fc64 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -43,7 +43,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev) if (!p) return; - if (p->nr_cpus_allowed == nr_cpus) + if (p->nr_cpus_allowed == nr_cpus && !is_migration_disabled(p)) target = bpf_get_prandom_u32() % nr_cpus; else target = scx_bpf_task_cpu(p); diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c index 0ff27e57fe43..e1f2ce4abfe6 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.c @@ -15,6 +15,7 @@ static enum scx_test_status setup(void **ctx) skel = dsp_local_on__open(); SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); SCX_FAIL_IF(dsp_local_on__load(skel), "Failed to load skel"); diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c index 73e679953e27..d3387ae03679 100644 --- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c +++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct enq_last_no_enq_fails *skel; - skel = enq_last_no_enq_fails__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = enq_last_no_enq_fails__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(enq_last_no_enq_fails__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c index dd1350e5f002..a80e3a3b3698 100644 --- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c +++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct enq_select_cpu_fails *skel; - skel = enq_select_cpu_fails__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = enq_select_cpu_fails__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(enq_select_cpu_fails__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c index 31bcd06e21cd..9451782689de 100644 --- a/tools/testing/selftests/sched_ext/exit.c +++ b/tools/testing/selftests/sched_ext/exit.c @@ -23,6 +23,7 @@ static enum scx_test_status run(void *ctx) char buf[16]; skel = exit__open(); + SCX_ENUM_INIT(skel); skel->rodata->exit_point = tc; exit__load(skel); link = bpf_map__attach_struct_ops(skel->maps.exit_ops); diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c index 87bf220b1bce..1c9ceb661c43 100644 --- a/tools/testing/selftests/sched_ext/hotplug.c +++ b/tools/testing/selftests/sched_ext/hotplug.c @@ -49,8 +49,10 @@ static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined) SCX_ASSERT(is_cpu_online()); - skel = hotplug__open_and_load(); - SCX_ASSERT(skel); + skel = hotplug__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(hotplug__load(skel), "Failed to load skel"); /* Testing the offline -> online path, so go offline before starting */ if (onlining) diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c index 97d45f1e5597..eddf9e0e26e7 100644 --- a/tools/testing/selftests/sched_ext/init_enable_count.c +++ b/tools/testing/selftests/sched_ext/init_enable_count.c @@ -15,22 +15,6 @@ #define SCHED_EXT 7 -static struct init_enable_count * -open_load_prog(bool global) -{ - struct init_enable_count *skel; - - skel = init_enable_count__open(); - SCX_BUG_ON(!skel, "Failed to open skel"); - - if (!global) - skel->struct_ops.init_enable_count_ops->flags |= SCX_OPS_SWITCH_PARTIAL; - - SCX_BUG_ON(init_enable_count__load(skel), "Failed to load skel"); - - return skel; -} - static enum scx_test_status run_test(bool global) { struct init_enable_count *skel; @@ -40,7 +24,14 @@ static enum scx_test_status run_test(bool global) struct sched_param param = {}; pid_t pids[num_pre_forks]; - skel = open_load_prog(global); + skel = init_enable_count__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + + if (!global) + skel->struct_ops.init_enable_count_ops->flags |= SCX_OPS_SWITCH_PARTIAL; + + SCX_FAIL_IF(init_enable_count__load(skel), "Failed to load skel"); /* * Fork a bunch of children before we attach the scheduler so that we @@ -159,7 +150,7 @@ static enum scx_test_status run(void *ctx) struct scx_test init_enable_count = { .name = "init_enable_count", - .description = "Verify we do the correct amount of counting of init, " + .description = "Verify we correctly count the occurrences of init, " "enable, etc callbacks.", .run = run, }; diff --git a/tools/testing/selftests/sched_ext/maximal.c b/tools/testing/selftests/sched_ext/maximal.c index f38fc973c380..c6be50a9941d 100644 --- a/tools/testing/selftests/sched_ext/maximal.c +++ b/tools/testing/selftests/sched_ext/maximal.c @@ -14,8 +14,11 @@ static enum scx_test_status setup(void **ctx) { struct maximal *skel; - skel = maximal__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = maximal__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(maximal__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/maybe_null.c b/tools/testing/selftests/sched_ext/maybe_null.c index 31cfafb0cf65..aacf0c58ca4f 100644 --- a/tools/testing/selftests/sched_ext/maybe_null.c +++ b/tools/testing/selftests/sched_ext/maybe_null.c @@ -43,7 +43,7 @@ static enum scx_test_status run(void *ctx) struct scx_test maybe_null = { .name = "maybe_null", - .description = "Verify if PTR_MAYBE_NULL work for .dispatch", + .description = "Verify if PTR_MAYBE_NULL works for .dispatch", .run = run, }; REGISTER_SCX_TEST(&maybe_null) diff --git a/tools/testing/selftests/sched_ext/minimal.c b/tools/testing/selftests/sched_ext/minimal.c index 6c5db8ebbf8a..89f7261757ff 100644 --- a/tools/testing/selftests/sched_ext/minimal.c +++ b/tools/testing/selftests/sched_ext/minimal.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct minimal *skel; - skel = minimal__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = minimal__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(minimal__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/prog_run.c b/tools/testing/selftests/sched_ext/prog_run.c index 3cd57ef8daaa..05974820ca69 100644 --- a/tools/testing/selftests/sched_ext/prog_run.c +++ b/tools/testing/selftests/sched_ext/prog_run.c @@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct prog_run *skel; - skel = prog_run__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = prog_run__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(prog_run__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/reload_loop.c b/tools/testing/selftests/sched_ext/reload_loop.c index 5cfba2d6e056..308211d80436 100644 --- a/tools/testing/selftests/sched_ext/reload_loop.c +++ b/tools/testing/selftests/sched_ext/reload_loop.c @@ -18,11 +18,10 @@ bool force_exit = false; static enum scx_test_status setup(void **ctx) { - skel = maximal__open_and_load(); - if (!skel) { - SCX_ERR("Failed to open and load skel"); - return SCX_TEST_FAIL; - } + skel = maximal__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(maximal__load(skel), "Failed to load skel"); return SCX_TEST_PASS; } diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.c index a53a40c2d2f0..5b6e045e1109 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.c @@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dfl *skel; - skel = select_cpu_dfl__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dfl__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dfl__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c index 1d85bf4bf3a3..9b5d232efb7f 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c @@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dfl_nodispatch *skel; - skel = select_cpu_dfl_nodispatch__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dfl_nodispatch__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dfl_nodispatch__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c index 0309ca8785b3..80283dbc41b7 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c @@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dispatch *skel; - skel = select_cpu_dispatch__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dispatch__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dispatch__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c index 47eb6ed7627d..5e72ebbc90a5 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dispatch_bad_dsq *skel; - skel = select_cpu_dispatch_bad_dsq__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dispatch_bad_dsq__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dispatch_bad_dsq__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c index 48ff028a3c46..aa85949478bc 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_dispatch_dbl_dsp *skel; - skel = select_cpu_dispatch_dbl_dsp__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_dispatch_dbl_dsp__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_dispatch_dbl_dsp__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.c index b4629c2364f5..1e9b5c9bfff1 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_vtime.c +++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.c @@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx) { struct select_cpu_vtime *skel; - skel = select_cpu_vtime__open_and_load(); - SCX_FAIL_IF(!skel, "Failed to open and load skel"); + skel = select_cpu_vtime__open(); + SCX_FAIL_IF(!skel, "Failed to open"); + SCX_ENUM_INIT(skel); + SCX_FAIL_IF(select_cpu_vtime__load(skel), "Failed to load skel"); + *ctx = skel; return SCX_TEST_PASS; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 8c3a73461475..14ba51b52095 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -47,6 +47,7 @@ #include <linux/kcmp.h> #include <sys/resource.h> #include <sys/capability.h> +#include <linux/perf_event.h> #include <unistd.h> #include <sys/syscall.h> @@ -68,6 +69,10 @@ # define PR_SET_PTRACER 0x59616d61 #endif +#ifndef noinline +#define noinline __attribute__((noinline)) +#endif + #ifndef PR_SET_NO_NEW_PRIVS #define PR_SET_NO_NEW_PRIVS 38 #define PR_GET_NO_NEW_PRIVS 39 @@ -4888,6 +4893,200 @@ TEST(tsync_vs_dead_thread_leader) EXPECT_EQ(0, status); } +noinline int probed(void) +{ + return 1; +} + +static int parse_uint_from_file(const char *file, const char *fmt) +{ + int err = -1, ret; + FILE *f; + + f = fopen(file, "re"); + if (f) { + err = fscanf(f, fmt, &ret); + fclose(f); + } + return err == 1 ? ret : err; +} + +static int determine_uprobe_perf_type(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/type"; + + return parse_uint_from_file(file, "%d\n"); +} + +static int determine_uprobe_retprobe_bit(void) +{ + const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe"; + + return parse_uint_from_file(file, "config:%d\n"); +} + +static ssize_t get_uprobe_offset(const void *addr) +{ + size_t start, base, end; + bool found = false; + char buf[256]; + FILE *f; + + f = fopen("/proc/self/maps", "r"); + if (!f) + return -1; + + while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) { + if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) { + found = true; + break; + } + } + fclose(f); + return found ? (uintptr_t)addr - start + base : -1; +} + +FIXTURE(URETPROBE) { + int fd; +}; + +FIXTURE_VARIANT(URETPROBE) { + /* + * All of the URETPROBE behaviors can be tested with either + * uretprobe attached or not + */ + bool attach; +}; + +FIXTURE_VARIANT_ADD(URETPROBE, attached) { + .attach = true, +}; + +FIXTURE_VARIANT_ADD(URETPROBE, not_attached) { + .attach = false, +}; + +FIXTURE_SETUP(URETPROBE) +{ + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; + ssize_t offset; + int type, bit; + +#ifndef __NR_uretprobe + SKIP(return, "__NR_uretprobe syscall not defined"); +#endif + + if (!variant->attach) + return; + + memset(&attr, 0, attr_sz); + + type = determine_uprobe_perf_type(); + ASSERT_GE(type, 0); + bit = determine_uprobe_retprobe_bit(); + ASSERT_GE(bit, 0); + offset = get_uprobe_offset(probed); + ASSERT_GE(offset, 0); + + attr.config |= 1 << bit; + attr.size = attr_sz; + attr.type = type; + attr.config1 = ptr_to_u64("/proc/self/exe"); + attr.config2 = offset; + + self->fd = syscall(__NR_perf_event_open, &attr, + getpid() /* pid */, -1 /* cpu */, -1 /* group_fd */, + PERF_FLAG_FD_CLOEXEC); +} + +FIXTURE_TEARDOWN(URETPROBE) +{ + /* we could call close(self->fd), but we'd need extra filter for + * that and since we are calling _exit right away.. + */ +} + +static int run_probed_with_filter(struct sock_fprog *prog) +{ + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) || + seccomp(SECCOMP_SET_MODE_FILTER, 0, prog)) { + return -1; + } + + probed(); + return 0; +} + +TEST_F(URETPROBE, uretprobe_default_allow) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + +TEST_F(URETPROBE, uretprobe_default_block) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + +TEST_F(URETPROBE, uretprobe_block_uretprobe_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), +#ifdef __NR_uretprobe + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 0, 1), +#endif + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + +TEST_F(URETPROBE, uretprobe_default_block_with_uretprobe_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), +#ifdef __NR_uretprobe + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_uretprobe, 2, 0), +#endif + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit_group, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, run_probed_with_filter(&prog)); +} + /* * TODO: * - expand NNP testing diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index d3dd65b05b5f..9044ac054167 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -94,5 +94,37 @@ "$TC qdisc del dev $DUMMY ingress", "$IP addr del 10.10.10.10/24 dev $DUMMY" ] - } + }, + { + "id": "a4b9", + "name": "Test class qlen notification", + "category": [ + "qdisc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem", + "$TC qdisc add dev $DUMMY parent 2: handle 3: drr", + "$TC filter add dev $DUMMY parent 3: basic action drop", + "$TC class add dev $DUMMY parent 3: classid 3:1 drr", + "$TC class del dev $DUMMY classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC qdisc ls dev $DUMMY", + "matchPattern": "drr 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr", + "$IP addr del 10.10.10.10/24 dev $DUMMY" + ] + } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json index ae3d286a32b2..6f20d033670d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json @@ -313,6 +313,29 @@ "matchPattern": "qdisc bfifo 1: root", "matchCount": "0", "teardown": [ + ] + }, + { + "id": "d774", + "name": "Check pfifo_head_drop qdisc enqueue behaviour when limit == 0", + "category": [ + "qdisc", + "pfifo_head_drop" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: pfifo_head_drop limit 0", + "$IP link set dev $DUMMY up || true" + ], + "cmdUnderTest": "ping -c2 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "dropped 2", + "matchCount": "1", + "teardown": [ ] } ] diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index dfff8b288265..d0f6d253ac72 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1788,6 +1788,42 @@ static void test_stream_connect_retry_server(const struct test_opts *opts) close(fd); } +static void test_stream_linger_client(const struct test_opts *opts) +{ + struct linger optval = { + .l_onoff = 1, + .l_linger = 1 + }; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) { + perror("setsockopt(SO_LINGER)"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_linger_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1943,6 +1979,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_connect_retry_client, .run_server = test_stream_connect_retry_server, }, + { + .name = "SOCK_STREAM SO_LINGER null-ptr-deref", + .run_client = test_stream_linger_client, + .run_server = test_stream_linger_server, + }, {}, }; diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile index f2552f73a64c..056d212f25cf 100644 --- a/tools/thermal/lib/Makefile +++ b/tools/thermal/lib/Makefile @@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative) libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -ifeq ("$(origin V)", "command line") - VERBOSE = $(V) -endif -ifndef VERBOSE - VERBOSE = 0 -endif - -ifeq ($(VERBOSE),1) - Q = -else - Q = @ -endif - # Set compile option CFLAGS ifdef EXTRA_CFLAGS CFLAGS := $(EXTRA_CFLAGS) diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile index 6518b03e05c7..257a56b1899f 100644 --- a/tools/tracing/latency/Makefile +++ b/tools/tracing/latency/Makefile @@ -37,12 +37,6 @@ FEATURE_TESTS += libtracefs FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs -ifeq ($(V),1) - Q = -else - Q = @ -endif - all: $(LATENCY-COLLECTOR) include $(srctree)/tools/build/Makefile.include diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 8b5101457c70..0b61208db604 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -37,12 +37,6 @@ FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs FEATURE_DISPLAY += libcpupower -ifeq ($(V),1) - Q = -else - Q = @ -endif - all: $(RTLA) include $(srctree)/tools/build/Makefile.include diff --git a/tools/verification/rv/Makefile b/tools/verification/rv/Makefile index 411d62b3d8eb..5b898360ba48 100644 --- a/tools/verification/rv/Makefile +++ b/tools/verification/rv/Makefile @@ -35,12 +35,6 @@ FEATURE_TESTS += libtracefs FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs -ifeq ($(V),1) - Q = -else - Q = @ -endif - all: $(RV) include $(srctree)/tools/build/Makefile.include diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index faf10671eed2..ba0327e2d0d3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1071,15 +1071,6 @@ out_err: } /* - * Called after the VM is otherwise initialized, but just before adding it to - * the vm_list. - */ -int __weak kvm_arch_post_init_vm(struct kvm *kvm) -{ - return 0; -} - -/* * Called just after removing the VM from the vm_list, but before doing any * other destruction. */ @@ -1199,10 +1190,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (r) goto out_err_no_debugfs; - r = kvm_arch_post_init_vm(kvm); - if (r) - goto out_err; - mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); mutex_unlock(&kvm_lock); @@ -1212,8 +1199,6 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) return kvm; -out_err: - kvm_destroy_vm_debugfs(kvm); out_err_no_debugfs: kvm_coalesced_mmio_free(kvm); out_no_coalesced_mmio: @@ -1971,7 +1956,15 @@ static int kvm_set_memory_region(struct kvm *kvm, return -EINVAL; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) return -EINVAL; - if ((mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) + + /* + * The size of userspace-defined memory regions is restricted in order + * to play nice with dirty bitmap operations, which are indexed with an + * "unsigned int". KVM's internal memory regions don't support dirty + * logging, and so are exempt. + */ + if (id < KVM_USER_MEM_SLOTS && + (mem->memory_size >> PAGE_SHIFT) > KVM_MEM_MAX_NR_PAGES) return -EINVAL; slots = __kvm_memslots(kvm, as_id); |