From dfcf2e017f5bb928094952d5d56d3566d3d07ba7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 25 Jan 2022 16:20:01 +0300 Subject: swiotlb: do not zero buffer in set_memory_decrypted() For larger TDX VM, memset() after set_memory_decrypted() in swiotlb_update_mem_attributes() takes substantial portion of boot time. Zeroing doesn't serve any functional purpose. Malicious VMM can mess with decrypted/shared buffer at any point. Remove the memset(). Signed-off-by: Kirill A. Shutemov Acked-by: Tom Lendacky Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index f1e7ea160b43..9390b38d2897 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -207,8 +207,6 @@ void __init swiotlb_update_mem_attributes(void) mem->vaddr = swiotlb_mem_remap(mem, bytes); if (!mem->vaddr) mem->vaddr = vaddr; - - memset(mem->vaddr, 0, bytes); } static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, -- cgit From 35265899acef135225e946b883fb07acba1d31a2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 24 Jan 2022 16:40:17 +0000 Subject: swiotlb: simplify debugfs setup Debugfs functions are already stubbed out for !CONFIG_DEBUG_FS, so we can remove most of the #ifdefs, just keeping one to manually optimise away the initcall when it would do nothing. We can also simplify the code itself by factoring out the directory creation and realising that the global io_tlb_default_mem now makes debugfs_dir redundant. Signed-off-by: Robin Murphy Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 9390b38d2897..f829259262fd 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -36,9 +36,7 @@ #include #include #include -#ifdef CONFIG_DEBUG_FS #include -#endif #ifdef CONFIG_DMA_RESTRICTED_POOL #include #include @@ -756,47 +754,29 @@ bool is_swiotlb_active(struct device *dev) } EXPORT_SYMBOL_GPL(is_swiotlb_active); -#ifdef CONFIG_DEBUG_FS -static struct dentry *debugfs_dir; - -static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem) +static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem, + const char *dirname) { + mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs); + if (!mem->nslabs) + return; + debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs); debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used); } -static int __init swiotlb_create_default_debugfs(void) +static int __init __maybe_unused swiotlb_create_default_debugfs(void) { - struct io_tlb_mem *mem = &io_tlb_default_mem; - - debugfs_dir = debugfs_create_dir("swiotlb", NULL); - if (mem->nslabs) { - mem->debugfs = debugfs_dir; - swiotlb_create_debugfs_files(mem); - } + swiotlb_create_debugfs_files(&io_tlb_default_mem, "swiotlb"); return 0; } +#ifdef CONFIG_DEBUG_FS late_initcall(swiotlb_create_default_debugfs); - #endif #ifdef CONFIG_DMA_RESTRICTED_POOL -#ifdef CONFIG_DEBUG_FS -static void rmem_swiotlb_debugfs_init(struct reserved_mem *rmem) -{ - struct io_tlb_mem *mem = rmem->priv; - - mem->debugfs = debugfs_create_dir(rmem->name, debugfs_dir); - swiotlb_create_debugfs_files(mem); -} -#else -static void rmem_swiotlb_debugfs_init(struct reserved_mem *rmem) -{ -} -#endif - struct page *swiotlb_alloc(struct device *dev, size_t size) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; @@ -858,7 +838,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, rmem->priv = mem; - rmem_swiotlb_debugfs_init(rmem); + swiotlb_create_debugfs_files(mem, rmem->name); } dev->dma_io_tlb_mem = mem; -- cgit From c0a4191c27a12d3175283fa33f16db20e91008fd Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 24 Jan 2022 16:40:18 +0000 Subject: swiotlb: tidy up includes SWIOTLB's includes have become a great big mess. Restore some order by consolidating the random different blocks, sorting alphabetically, and purging some clearly unnecessary entries - linux/io.h is now included unconditionally, so need not be duplicated in the restricted DMA pool case; similarly, linux/io.h subsumes asm/io.h; and by now it's a mystery why asm/dma.h was ever here at all. Signed-off-by: Robin Murphy Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index f829259262fd..f3ff0af49f81 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -21,38 +21,33 @@ #define pr_fmt(fmt) "software IO TLB: " fmt #include +#include +#include +#include #include #include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include #ifdef CONFIG_DMA_RESTRICTED_POOL -#include #include #include #include #include #endif -#include -#include - -#include -#include -#include -#include - #define CREATE_TRACE_POINTS #include -- cgit From 404f9373c4e5c943ed8a5e71c8dcfef9eddd54ab Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 24 Jan 2022 16:40:19 +0000 Subject: swiotlb: simplify array allocation Prefer kcalloc() to kzalloc(array_size()) for allocating an array. Signed-off-by: Robin Murphy Signed-off-by: Christoph Hellwig --- kernel/dma/swiotlb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index f3ff0af49f81..908eac2527cb 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -818,8 +818,7 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, if (!mem) return -ENOMEM; - mem->slots = kzalloc(array_size(sizeof(*mem->slots), nslabs), - GFP_KERNEL); + mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL); if (!mem->slots) { kfree(mem); return -ENOMEM; -- cgit From e62c17f0455a74b182ce6373e2777817256afaa1 Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Tue, 8 Feb 2022 15:04:51 +0800 Subject: MAINTAINERS: update maintainer list of DMA MAPPING BENCHMARK Barry Song will not focus on this area, and Xiang Chen will continue his work to maintain this module. Signed-off-by: Xiang Chen Acked-by: Barry Song Signed-off-by: Christoph Hellwig --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index ea3e6c914384..48335022b0e4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5765,7 +5765,7 @@ F: include/linux/dma-map-ops.h F: kernel/dma/ DMA MAPPING BENCHMARK -M: Barry Song +M: Xiang Chen L: iommu@lists.linux-foundation.org F: kernel/dma/map_benchmark.c F: tools/testing/selftests/dma/ -- cgit From 06cc5cf16591c3b1d63af2bbc9d33a66419ced98 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 6 Jan 2022 22:46:52 +0100 Subject: alpha: Remove usage of the deprecated "pci-dma-compat.h" API In [1], Christoph Hellwig has proposed to remove the wrappers in include/linux/pci-dma-compat.h. Some reasons why this API should be removed have been given by Julia Lawall in [2]. A coccinelle script has been used to perform the needed transformation. It can be found in [3]. [1]: https://lore.kernel.org/kernel-janitors/20200421081257.GA131897@infradead.org/ [2]: https://lore.kernel.org/kernel-janitors/alpine.DEB.2.22.394.2007120902170.2424@hadrien/ [3]: https://lore.kernel.org/kernel-janitors/20200716192821.321233-1-christophe.jaillet@wanadoo.fr/ Signed-off-by: Christophe JAILLET Reviewed-by: Arnd Bergmann Signed-off-by: Christoph Hellwig --- arch/alpha/include/asm/floppy.h | 7 ++++--- arch/alpha/kernel/pci_iommu.c | 12 ++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/alpha/include/asm/floppy.h b/arch/alpha/include/asm/floppy.h index 8dfdb3aa1d96..588758685439 100644 --- a/arch/alpha/include/asm/floppy.h +++ b/arch/alpha/include/asm/floppy.h @@ -43,17 +43,18 @@ alpha_fd_dma_setup(char *addr, unsigned long size, int mode, int io) static int prev_dir; int dir; - dir = (mode != DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE; + dir = (mode != DMA_MODE_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; if (bus_addr && (addr != prev_addr || size != prev_size || dir != prev_dir)) { /* different from last time -- unmap prev */ - pci_unmap_single(isa_bridge, bus_addr, prev_size, prev_dir); + dma_unmap_single(&isa_bridge->dev, bus_addr, prev_size, + prev_dir); bus_addr = 0; } if (!bus_addr) /* need to map it */ - bus_addr = pci_map_single(isa_bridge, addr, size, dir); + bus_addr = dma_map_single(&isa_bridge->dev, addr, size, dir); /* remember this one as prev */ prev_addr = addr; diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 21f9ac101324..e83a02ed5267 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -333,7 +333,7 @@ static dma_addr_t alpha_pci_map_page(struct device *dev, struct page *page, struct pci_dev *pdev = alpha_gendev_to_pci(dev); int dac_allowed; - BUG_ON(dir == PCI_DMA_NONE); + BUG_ON(dir == DMA_NONE); dac_allowed = pdev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0; return pci_map_single_1(pdev, (char *)page_address(page) + offset, @@ -356,7 +356,7 @@ static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr, struct pci_iommu_arena *arena; long dma_ofs, npages; - BUG_ON(dir == PCI_DMA_NONE); + BUG_ON(dir == DMA_NONE); if (dma_addr >= __direct_map_base && dma_addr < __direct_map_base + __direct_map_size) { @@ -460,7 +460,7 @@ static void alpha_pci_free_coherent(struct device *dev, size_t size, unsigned long attrs) { struct pci_dev *pdev = alpha_gendev_to_pci(dev); - pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL); + dma_unmap_single(&pdev->dev, dma_addr, size, DMA_BIDIRECTIONAL); free_pages((unsigned long)cpu_addr, get_order(size)); DBGA2("pci_free_consistent: [%llx,%zx] from %ps\n", @@ -639,7 +639,7 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg, dma_addr_t max_dma; int dac_allowed; - BUG_ON(dir == PCI_DMA_NONE); + BUG_ON(dir == DMA_NONE); dac_allowed = dev ? pci_dac_dma_supported(pdev, pdev->dma_mask) : 0; @@ -702,7 +702,7 @@ static int alpha_pci_map_sg(struct device *dev, struct scatterlist *sg, /* Some allocation failed while mapping the scatterlist entries. Unmap them now. */ if (out > start) - pci_unmap_sg(pdev, start, out - start, dir); + dma_unmap_sg(&pdev->dev, start, out - start, dir); return -ENOMEM; } @@ -722,7 +722,7 @@ static void alpha_pci_unmap_sg(struct device *dev, struct scatterlist *sg, dma_addr_t max_dma; dma_addr_t fbeg, fend; - BUG_ON(dir == PCI_DMA_NONE); + BUG_ON(dir == DMA_NONE); if (! alpha_mv.mv_pci_tbi) return; -- cgit From ffecba83be9c7ced229b9f1d75643d5a49f820c4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 6 Jan 2022 22:51:05 +0100 Subject: agp/intel: Remove usage of the deprecated "pci-dma-compat.h" API In [1], Christoph Hellwig has proposed to remove the wrappers in include/linux/pci-dma-compat.h. Some reasons why this API should be removed have been given by Julia Lawall in [2]. A coccinelle script has been used to perform the needed transformation. It can be found in [3]. [1]: https://lore.kernel.org/kernel-janitors/20200421081257.GA131897@infradead.org/ [2]: https://lore.kernel.org/kernel-janitors/alpine.DEB.2.22.394.2007120902170.2424@hadrien/ [3]: https://lore.kernel.org/kernel-janitors/20200716192821.321233-1-christophe.jaillet@wanadoo.fr/ Signed-off-by: Christophe JAILLET Reviewed-by: Arnd Bergmann Signed-off-by: Christoph Hellwig --- drivers/char/agp/intel-gtt.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index c53cc9868cd8..79a1b65527c2 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -111,8 +111,8 @@ static int intel_gtt_map_memory(struct page **pages, for_each_sg(st->sgl, sg, num_entries, i) sg_set_page(sg, pages[i], PAGE_SIZE, 0); - if (!pci_map_sg(intel_private.pcidev, - st->sgl, st->nents, PCI_DMA_BIDIRECTIONAL)) + if (!dma_map_sg(&intel_private.pcidev->dev, st->sgl, st->nents, + DMA_BIDIRECTIONAL)) goto err; return 0; @@ -127,8 +127,8 @@ static void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg) struct sg_table st; DBG("try unmapping %lu pages\n", (unsigned long)mem->page_count); - pci_unmap_sg(intel_private.pcidev, sg_list, - num_sg, PCI_DMA_BIDIRECTIONAL); + dma_unmap_sg(&intel_private.pcidev->dev, sg_list, num_sg, + DMA_BIDIRECTIONAL); st.sgl = sg_list; st.orig_nents = st.nents = num_sg; @@ -303,9 +303,9 @@ static int intel_gtt_setup_scratch_page(void) set_pages_uc(page, 1); if (intel_private.needs_dmar) { - dma_addr = pci_map_page(intel_private.pcidev, page, 0, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(intel_private.pcidev, dma_addr)) { + dma_addr = dma_map_page(&intel_private.pcidev->dev, page, 0, + PAGE_SIZE, DMA_BIDIRECTIONAL); + if (dma_mapping_error(&intel_private.pcidev->dev, dma_addr)) { __free_page(page); return -EINVAL; } @@ -552,9 +552,9 @@ static void intel_gtt_teardown_scratch_page(void) { set_pages_wb(intel_private.scratch_page, 1); if (intel_private.needs_dmar) - pci_unmap_page(intel_private.pcidev, - intel_private.scratch_page_dma, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + dma_unmap_page(&intel_private.pcidev->dev, + intel_private.scratch_page_dma, PAGE_SIZE, + DMA_BIDIRECTIONAL); __free_page(intel_private.scratch_page); } @@ -1412,13 +1412,13 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev, if (bridge) { mask = intel_private.driver->dma_mask_size; - if (pci_set_dma_mask(intel_private.pcidev, DMA_BIT_MASK(mask))) + if (dma_set_mask(&intel_private.pcidev->dev, DMA_BIT_MASK(mask))) dev_err(&intel_private.pcidev->dev, "set gfx device dma mask %d-bit failed!\n", mask); else - pci_set_consistent_dma_mask(intel_private.pcidev, - DMA_BIT_MASK(mask)); + dma_set_coherent_mask(&intel_private.pcidev->dev, + DMA_BIT_MASK(mask)); } if (intel_gtt_init() != 0) { -- cgit From 0fb3436b4b36cf69f4544385aa2bb8c5a4913509 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 6 Jan 2022 22:51:38 +0100 Subject: sparc: Remove usage of the deprecated "pci-dma-compat.h" API In [1], Christoph Hellwig has proposed to remove the wrappers in include/linux/pci-dma-compat.h. Some reasons why this API should be removed have been given by Julia Lawall in [2]. A coccinelle script has been used to perform the needed transformation. It can be found in [3]. [1]: https://lore.kernel.org/kernel-janitors/20200421081257.GA131897@infradead.org/ [2]: https://lore.kernel.org/kernel-janitors/alpine.DEB.2.22.394.2007120902170.2424@hadrien/ [3]: https://lore.kernel.org/kernel-janitors/20200716192821.321233-1-christophe.jaillet@wanadoo.fr/ Signed-off-by: Christophe JAILLET Reviewed-by: Arnd Bergmann Acked-by: David S. Miller Signed-off-by: Christoph Hellwig --- arch/sparc/kernel/ioport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 57a72c46eddb..4e4f3d3263e4 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -309,7 +309,7 @@ arch_initcall(sparc_register_ioport); void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - if (dir != PCI_DMA_TODEVICE && + if (dir != DMA_TO_DEVICE && sparc_cpu_model == sparc_leon && !sparc_leon3_snooping_enabled()) leon_flush_dcache_all(); -- cgit From 8c155674d9757be855547dc4eb6bcb82d52482e7 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 6 Jan 2022 22:52:46 +0100 Subject: rapidio/tsi721: Remove usage of the deprecated "pci-dma-compat.h" API In [1], Christoph Hellwig has proposed to remove the wrappers in include/linux/pci-dma-compat.h. Some reasons why this API should be removed have been given by Julia Lawall in [2]. A coccinelle script has been used to perform the needed transformation. It can be found in [3]. [1]: https://lore.kernel.org/kernel-janitors/20200421081257.GA131897@infradead.org/ [2]: https://lore.kernel.org/kernel-janitors/alpine.DEB.2.22.394.2007120902170.2424@hadrien/ [3]: https://lore.kernel.org/kernel-janitors/20200716192821.321233-1-christophe.jaillet@wanadoo.fr/ Signed-off-by: Christophe JAILLET Reviewed-by: Arnd Bergmann Signed-off-by: Christoph Hellwig --- drivers/rapidio/devices/tsi721.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/rapidio/devices/tsi721.c b/drivers/rapidio/devices/tsi721.c index 4dd31dd9feea..b3134744fb55 100644 --- a/drivers/rapidio/devices/tsi721.c +++ b/drivers/rapidio/devices/tsi721.c @@ -2836,17 +2836,17 @@ static int tsi721_probe(struct pci_dev *pdev, } /* Configure DMA attributes. */ - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + if (dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) { + err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); if (err) { tsi_err(&pdev->dev, "Unable to set DMA mask"); goto err_unmap_bars; } - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) + if (dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) tsi_info(&pdev->dev, "Unable to set consistent DMA mask"); } else { - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); + err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64)); if (err) tsi_info(&pdev->dev, "Unable to set consistent DMA mask"); } -- cgit From fba09099c6e506608e05e08ac717bf34501f821b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 6 Jan 2022 22:53:11 +0100 Subject: media: v4l2-pci-skeleton: Remove usage of the deprecated "pci-dma-compat.h" API In [1], Christoph Hellwig has proposed to remove the wrappers in include/linux/pci-dma-compat.h. Some reasons why this API should be removed have been given by Julia Lawall in [2]. A coccinelle script has been used to perform the needed transformation. It can be found in [3]. [1]: https://lore.kernel.org/kernel-janitors/20200421081257.GA131897@infradead.org/ [2]: https://lore.kernel.org/kernel-janitors/alpine.DEB.2.22.394.2007120902170.2424@hadrien/ [3]: https://lore.kernel.org/kernel-janitors/20200716192821.321233-1-christophe.jaillet@wanadoo.fr/ Signed-off-by: Christophe JAILLET Reviewed-by: Arnd Bergmann Signed-off-by: Christoph Hellwig --- samples/v4l/v4l2-pci-skeleton.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/v4l/v4l2-pci-skeleton.c b/samples/v4l/v4l2-pci-skeleton.c index 3fa6582b4a68..6311b7465220 100644 --- a/samples/v4l/v4l2-pci-skeleton.c +++ b/samples/v4l/v4l2-pci-skeleton.c @@ -766,7 +766,7 @@ static int skeleton_probe(struct pci_dev *pdev, const struct pci_device_id *ent) ret = pci_enable_device(pdev); if (ret) return ret; - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); + ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { dev_err(&pdev->dev, "no suitable DMA available.\n"); goto disable_pci; -- cgit From f5ff79fddf0efecca538046b5cc20fb3ded2ec4f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 26 Feb 2022 16:40:21 +0100 Subject: dma-mapping: remove CONFIG_DMA_REMAP CONFIG_DMA_REMAP is used to build a few helpers around the core vmalloc code, and to use them in case there is a highmem page in dma-direct, and to make dma coherent allocations be able to use non-contiguous pages allocations for DMA allocations in the dma-iommu layer. Right now it needs to be explicitly selected by architectures, and is only done so by architectures that require remapping to deal with devices that are not DMA coherent. Make it unconditional for builds with CONFIG_MMU as it is very little extra code, but makes it much more likely that large DMA allocations succeed on x86. This fixes hot plugging a NVMe thunderbolt SSD for me, which tries to allocate a 1MB buffer that is otherwise hard to obtain due to memory fragmentation on a heavily used laptop. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- arch/arm/Kconfig | 2 +- arch/xtensa/Kconfig | 2 +- drivers/iommu/dma-iommu.c | 14 +++++--------- kernel/dma/Kconfig | 7 +------ kernel/dma/Makefile | 2 +- kernel/dma/direct.c | 18 +++++++----------- 6 files changed, 16 insertions(+), 29 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fabe39169b12..11fbf66ef8f8 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -47,7 +47,7 @@ config ARM select DMA_DECLARE_COHERENT select DMA_GLOBAL_POOL if !MMU select DMA_OPS - select DMA_REMAP if MMU + select DMA_NONCOHERENT_MMAP if MMU select EDAC_SUPPORT select EDAC_ATOMIC_SCRUB select GENERIC_ALLOCATOR diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 8ac599aa6d99..76438ee313d1 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -17,7 +17,7 @@ config XTENSA select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select DMA_REMAP if MMU + select DMA_NONCOHERENT_MMAP if MMU select GENERIC_ATOMIC64 select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d85d54f2b549..cebced7ddf39 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -852,7 +852,6 @@ out_unmap: return NULL; } -#ifdef CONFIG_DMA_REMAP static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) @@ -882,7 +881,6 @@ static void iommu_dma_free_noncontiguous(struct device *dev, size_t size, sg_free_table(&sh->sgt); kfree(sh); } -#endif /* CONFIG_DMA_REMAP */ static void iommu_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) @@ -1276,7 +1274,7 @@ static void __iommu_dma_free(struct device *dev, size_t size, void *cpu_addr) dma_free_from_pool(dev, cpu_addr, alloc_size)) return; - if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { + if (is_vmalloc_addr(cpu_addr)) { /* * If it the address is remapped, then it's either non-coherent * or highmem CMA, or an iommu_dma_alloc_remap() construction. @@ -1318,7 +1316,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size, if (!page) return NULL; - if (IS_ENABLED(CONFIG_DMA_REMAP) && (!coherent || PageHighMem(page))) { + if (!coherent || PageHighMem(page)) { pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); cpu_addr = dma_common_contiguous_remap(page, alloc_size, @@ -1350,7 +1348,7 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, gfp |= __GFP_ZERO; - if (IS_ENABLED(CONFIG_DMA_REMAP) && gfpflags_allow_blocking(gfp) && + if (gfpflags_allow_blocking(gfp) && !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { return iommu_dma_alloc_remap(dev, size, handle, gfp, dma_pgprot(dev, PAGE_KERNEL, attrs), attrs); @@ -1391,7 +1389,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, if (off >= nr_pages || vma_pages(vma) > nr_pages - off) return -ENXIO; - if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { + if (is_vmalloc_addr(cpu_addr)) { struct page **pages = dma_common_find_pages(cpu_addr); if (pages) @@ -1413,7 +1411,7 @@ static int iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, struct page *page; int ret; - if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { + if (is_vmalloc_addr(cpu_addr)) { struct page **pages = dma_common_find_pages(cpu_addr); if (pages) { @@ -1445,10 +1443,8 @@ static const struct dma_map_ops iommu_dma_ops = { .free = iommu_dma_free, .alloc_pages = dma_common_alloc_pages, .free_pages = dma_common_free_pages, -#ifdef CONFIG_DMA_REMAP .alloc_noncontiguous = iommu_dma_alloc_noncontiguous, .free_noncontiguous = iommu_dma_free_noncontiguous, -#endif .mmap = iommu_dma_mmap, .get_sgtable = iommu_dma_get_sgtable, .map_page = iommu_dma_map_page, diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 1b02179758cb..56866aaa2ae1 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -110,15 +110,10 @@ config DMA_GLOBAL_POOL select DMA_DECLARE_COHERENT bool -config DMA_REMAP - bool - depends on MMU - select DMA_NONCOHERENT_MMAP - config DMA_DIRECT_REMAP bool - select DMA_REMAP select DMA_COHERENT_POOL + select DMA_NONCOHERENT_MMAP config DMA_CMA bool "DMA Contiguous Memory Allocator" diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 0dd65ec1d234..21926e46ef4f 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -8,5 +8,5 @@ obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o obj-$(CONFIG_DMA_API_DEBUG) += debug.o obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o -obj-$(CONFIG_DMA_REMAP) += remap.o +obj-$(CONFIG_MMU) += remap.o obj-$(CONFIG_DMA_MAP_BENCHMARK) += map_benchmark.o diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 50f48e9e4598..35a1d29d6a2e 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -265,17 +265,13 @@ void *dma_direct_alloc(struct device *dev, size_t size, page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); if (!page) return NULL; + + /* + * dma_alloc_contiguous can return highmem pages depending on a + * combination the cma= arguments and per-arch setup. These need to be + * remapped to return a kernel virtual address. + */ if (PageHighMem(page)) { - /* - * Depending on the cma= arguments and per-arch setup, - * dma_alloc_contiguous could return highmem pages. - * Without remapping there is no way to return them here, so - * log an error and fail. - */ - if (!IS_ENABLED(CONFIG_DMA_REMAP)) { - dev_info(dev, "Rejecting highmem page from CMA.\n"); - goto out_free_pages; - } remap = true; set_uncached = false; } @@ -349,7 +345,7 @@ void dma_direct_free(struct device *dev, size_t size, dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) return; - if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) { + if (is_vmalloc_addr(cpu_addr)) { vunmap(cpu_addr); } else { if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED)) -- cgit From 80e4390981618e290616dbd06ea190d4576f219d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 28 Feb 2022 14:04:53 -0800 Subject: dma-debug: fix return value of __setup handlers When valid kernel command line parameters dma_debug=off dma_debug_entries=100 are used, they are reported as Unknown parameters and added to init's environment strings, polluting it. Unknown kernel command line parameters "BOOT_IMAGE=/boot/bzImage-517rc5 dma_debug=off dma_debug_entries=100", will be passed to user space. and Run /sbin/init as init process with arguments: /sbin/init with environment: HOME=/ TERM=linux BOOT_IMAGE=/boot/bzImage-517rc5 dma_debug=off dma_debug_entries=100 Return 1 from these __setup handlers to indicate that the command line option has been handled. Fixes: 59d3daafa1726 ("dma-debug: add kernel command line parameters") Signed-off-by: Randy Dunlap Reported-by: Igor Zhbanov Link: lore.kernel.org/r/64644a2f-4a20-bab3-1e15-3b2cdd0defe3@omprussia.ru Cc: Joerg Roedel Cc: Christoph Hellwig Cc: Marek Szyprowski Cc: iommu@lists.linux-foundation.org Cc: Robin Murphy Signed-off-by: Christoph Hellwig --- kernel/dma/debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 7a14ca29c377..f8ff598596b8 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -927,7 +927,7 @@ static __init int dma_debug_cmdline(char *str) global_disable = true; } - return 0; + return 1; } static __init int dma_debug_entries_cmdline(char *str) @@ -936,7 +936,7 @@ static __init int dma_debug_entries_cmdline(char *str) return -EINVAL; if (!get_option(&str, &nr_prealloc_entries)) nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - return 0; + return 1; } __setup("dma_debug=", dma_debug_cmdline); -- cgit From 8ddde07a3d285a0f3cec14924446608320fdc013 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Tue, 8 Mar 2022 16:59:10 +0800 Subject: dma-mapping: benchmark: extract a common header file for map_benchmark definition kernel/dma/map_benchmark.c and selftests/dma/dma_map_benchmark.c have duplicate map_benchmark definitions, which tends to lead to inconsistent changes to map_benchmark on both sides, extract a common header file to avoid this problem. Signed-off-by: Tian Tao Acked-by: Barry Song Reviewed-by: Shuah Khan Signed-off-by: Christoph Hellwig --- include/linux/map_benchmark.h | 31 +++++++++++++++++++++++++ kernel/dma/map_benchmark.c | 25 +------------------- tools/testing/selftests/dma/dma_map_benchmark.c | 25 +------------------- 3 files changed, 33 insertions(+), 48 deletions(-) create mode 100644 include/linux/map_benchmark.h diff --git a/include/linux/map_benchmark.h b/include/linux/map_benchmark.h new file mode 100644 index 000000000000..62674c83bde4 --- /dev/null +++ b/include/linux/map_benchmark.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 HiSilicon Limited. + */ + +#ifndef _KERNEL_DMA_BENCHMARK_H +#define _KERNEL_DMA_BENCHMARK_H + +#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) +#define DMA_MAP_MAX_THREADS 1024 +#define DMA_MAP_MAX_SECONDS 300 +#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC) + +#define DMA_MAP_BIDIRECTIONAL 0 +#define DMA_MAP_TO_DEVICE 1 +#define DMA_MAP_FROM_DEVICE 2 + +struct map_benchmark { + __u64 avg_map_100ns; /* average map latency in 100ns */ + __u64 map_stddev; /* standard deviation of map latency */ + __u64 avg_unmap_100ns; /* as above */ + __u64 unmap_stddev; + __u32 threads; /* how many threads will do map/unmap in parallel */ + __u32 seconds; /* how long the test will last */ + __s32 node; /* which numa node this benchmark will run on */ + __u32 dma_bits; /* DMA addressing capability */ + __u32 dma_dir; /* DMA data direction */ + __u32 dma_trans_ns; /* time for DMA transmission in ns */ + __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ +}; +#endif /* _KERNEL_DMA_BENCHMARK_H */ diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index 9b9af1bd6be3..0520a8f4fb1d 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -18,30 +19,6 @@ #include #include -#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) -#define DMA_MAP_MAX_THREADS 1024 -#define DMA_MAP_MAX_SECONDS 300 -#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC) - -#define DMA_MAP_BIDIRECTIONAL 0 -#define DMA_MAP_TO_DEVICE 1 -#define DMA_MAP_FROM_DEVICE 2 - -struct map_benchmark { - __u64 avg_map_100ns; /* average map latency in 100ns */ - __u64 map_stddev; /* standard deviation of map latency */ - __u64 avg_unmap_100ns; /* as above */ - __u64 unmap_stddev; - __u32 threads; /* how many threads will do map/unmap in parallel */ - __u32 seconds; /* how long the test will last */ - __s32 node; /* which numa node this benchmark will run on */ - __u32 dma_bits; /* DMA addressing capability */ - __u32 dma_dir; /* DMA data direction */ - __u32 dma_trans_ns; /* time for DMA transmission in ns */ - __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ - __u8 expansion[76]; /* For future use */ -}; - struct map_benchmark_data { struct map_benchmark bparam; struct device *dev; diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index 485dff51bad2..c3b3c09e995e 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -10,40 +10,17 @@ #include #include #include +#include #include #define NSEC_PER_MSEC 1000000L -#define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) -#define DMA_MAP_MAX_THREADS 1024 -#define DMA_MAP_MAX_SECONDS 300 -#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC) - -#define DMA_MAP_BIDIRECTIONAL 0 -#define DMA_MAP_TO_DEVICE 1 -#define DMA_MAP_FROM_DEVICE 2 - static char *directions[] = { "BIDIRECTIONAL", "TO_DEVICE", "FROM_DEVICE", }; -struct map_benchmark { - __u64 avg_map_100ns; /* average map latency in 100ns */ - __u64 map_stddev; /* standard deviation of map latency */ - __u64 avg_unmap_100ns; /* as above */ - __u64 unmap_stddev; - __u32 threads; /* how many threads will do map/unmap in parallel */ - __u32 seconds; /* how long the test will last */ - __s32 node; /* which numa node this benchmark will run on */ - __u32 dma_bits; /* DMA addressing capability */ - __u32 dma_dir; /* DMA data direction */ - __u32 dma_trans_ns; /* time for DMA transmission in ns */ - __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ - __u8 expansion[76]; /* For future use */ -}; - int main(int argc, char **argv) { struct map_benchmark map; -- cgit