swiotlb: reduce the number of areas to match actual memory pool size

Although the desired size of the SWIOTLB memory pool is increased in swiotlb_adjust_nareas() to match the number of areas, the actual allocation may be smaller, which may require reducing the number of areas. For example, Xen uses swiotlb_init_late(), which in turn uses the page allocator. On x86, page size is 4 KiB and MAX_ORDER is 10 (1024 pages), resulting in a maximum memory pool size of 4 MiB. This corresponds to 2048 slots of 2 KiB each. The minimum area size is 128 (IO_TLB_SEGSIZE), allowing at most 2048 / 128 = 16 areas. If num_possible_cpus() is greater than the maximum number of areas, areas are smaller than IO_TLB_SEGSIZE and contiguous groups of free slots will span multiple areas. When allocating and freeing slots, only one area will be properly locked, causing race conditions on the unlocked slots and ultimately data corruption, kernel hangs and crashes. Fixes: 20347fca71a3 ("swiotlb: split up the global swiotlb lock") Signed-off-by: Petr Tesarik <petr.tesarik.ext@huawei.com> Reviewed-by: Roberto Sassu <roberto.sassu@huawei.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
author: Petr Tesarik <petr.tesarik.ext@huawei.com> 2023-06-26 15:01:04 +0200
committer: Christoph Hellwig <hch@lst.de> 2023-06-29 07:10:28 +0200
commit: 8ac04063354a01a484d2e55d20ed1958aa0d3392 (patch)
tree: d3746823d2d24d64d9a4deb3679110e1c751c8aa /kernel/dma
parent: aabd12609f91155f26584508b01f548215cc3c0c (diff)
1 files changed, 24 insertions, 3 deletions
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 89db590f931f..2b83e3ad9dca 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -138,6 +138,23 @@ static void swiotlb_adjust_nareas(unsigned int nareas)
 			(default_nslabs << IO_TLB_SHIFT) >> 20);
 }
 
+/**
+ * limit_nareas() - get the maximum number of areas for a given memory pool size
+ * @nareas:	Desired number of areas.
+ * @nslots:	Total number of slots in the memory pool.
+ *
+ * Limit the number of areas to the maximum possible number of areas in
+ * a memory pool of the given size.
+ *
+ * Return: Maximum possible number of areas.
+ */
+static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots)
+{
+	if (nslots < nareas * IO_TLB_SEGSIZE)
+		return nslots / IO_TLB_SEGSIZE;
+	return nareas;
+}
+
 static int __init
 setup_io_tlb_npages(char *str)
 {
@@ -297,6 +314,7 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
 {
 	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	unsigned long nslabs;
+	unsigned int nareas;
 	size_t alloc_size;
 	void *tlb;
 
@@ -309,10 +327,12 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
 		swiotlb_adjust_nareas(num_possible_cpus());
 
 	nslabs = default_nslabs;
+	nareas = limit_nareas(default_nareas, nslabs);
 	while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) {
 		if (nslabs <= IO_TLB_MIN_SLABS)
 			return;
 		nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
+		nareas = limit_nareas(nareas, nslabs);
 	}
 
 	if (default_nslabs != nslabs) {
@@ -358,6 +378,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask,
 {
 	struct io_tlb_mem *mem = &io_tlb_default_mem;
 	unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
+	unsigned int nareas;
 	unsigned char *vstart = NULL;
 	unsigned int order, area_order;
 	bool retried = false;
@@ -403,8 +424,8 @@ retry:
 			(PAGE_SIZE << order) >> 20);
 	}
 
-	area_order = get_order(array_size(sizeof(*mem->areas),
-		default_nareas));
+	nareas = limit_nareas(default_nareas, nslabs);
+	area_order = get_order(array_size(sizeof(*mem->areas), nareas));
 	mem->areas = (struct io_tlb_area *)
 		__get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
 	if (!mem->areas)
@@ -418,7 +439,7 @@ retry:
 	set_memory_decrypted((unsigned long)vstart,
 			     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
 	swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
-				default_nareas);
+				nareas);
 
 	swiotlb_print_info();
 	return 0;
author	Petr Tesarik <petr.tesarik.ext@huawei.com>	2023-06-26 15:01:04 +0200
committer	Christoph Hellwig <hch@lst.de>	2023-06-29 07:10:28 +0200
commit	8ac04063354a01a484d2e55d20ed1958aa0d3392 (patch)
tree	d3746823d2d24d64d9a4deb3679110e1c751c8aa /kernel/dma
parent	aabd12609f91155f26584508b01f548215cc3c0c (diff)