summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 09:05:19 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-13 09:05:19 -0800
commite529d3507a93d3c9528580081bbaf931a50de154 (patch)
treec363cf495fdbec199a8a9860d8e663389ae8cdd4
parent6a24711d5c0bc8fb0fc49def433ab89ecbedf095 (diff)
parentffcb754584603adf7039d7972564fbf6febdc542 (diff)
Merge tag 'dma-mapping-6.2-2022-12-13' of git://git.infradead.org/users/hch/dma-mapping
Pull dma-mapping updates from Christoph Hellwig: - reduce the swiotlb buffer size on allocation failure (Alexey Kardashevskiy) - clean up passing of bogus GFP flags to the dma-coherent allocator (Christoph Hellwig) * tag 'dma-mapping-6.2-2022-12-13' of git://git.infradead.org/users/hch/dma-mapping: dma-mapping: reject __GFP_COMP in dma_alloc_attrs ALSA: memalloc: don't pass bogus GFP_ flags to dma_alloc_* s390/ism: don't pass bogus GFP_ flags to dma_alloc_coherent cnic: don't pass bogus GFP_ flags to dma_alloc_coherent RDMA/qib: don't pass bogus GFP_ flags to dma_alloc_coherent RDMA/hfi1: don't pass bogus GFP_ flags to dma_alloc_coherent media: videobuf-dma-contig: use dma_mmap_coherent swiotlb: reduce the swiotlb buffer size on allocation failure
-rw-r--r--arch/arm/mm/dma-mapping.c17
-rw-r--r--drivers/infiniband/hw/hfi1/init.c21
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c21
-rw-r--r--drivers/iommu/dma-iommu.c3
-rw-r--r--drivers/media/v4l2-core/videobuf-dma-contig.c22
-rw-r--r--drivers/net/ethernet/broadcom/cnic.c6
-rw-r--r--drivers/s390/net/ism_drv.c3
-rw-r--r--kernel/dma/mapping.c8
-rw-r--r--kernel/dma/swiotlb.c63
-rw-r--r--sound/core/memalloc.c5
11 files changed, 69 insertions, 102 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index d7909091cf97..c135f6e37a00 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -564,14 +564,6 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
if (mask < 0xffffffffULL)
gfp |= GFP_DMA;
- /*
- * Following is a work-around (a.k.a. hack) to prevent pages
- * with __GFP_COMP being passed to split_page() which cannot
- * handle them. The real problem is that this flag probably
- * should be 0 on ARM as it is not supported on this
- * platform; see CONFIG_HUGETLBFS.
- */
- gfp &= ~(__GFP_COMP);
args.gfp = gfp;
*handle = DMA_MAPPING_ERROR;
@@ -1093,15 +1085,6 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size,
return __iommu_alloc_simple(dev, size, gfp, handle,
coherent_flag, attrs);
- /*
- * Following is a work-around (a.k.a. hack) to prevent pages
- * with __GFP_COMP being passed to split_page() which cannot
- * handle them. The real problem is that this flag probably
- * should be 0 on ARM as it is not supported on this
- * platform; see CONFIG_HUGETLBFS.
- */
- gfp &= ~(__GFP_COMP);
-
pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag);
if (!pages)
return NULL;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 436372b31431..24c0f0d257fc 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1761,17 +1761,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
unsigned amt;
if (!rcd->rcvhdrq) {
- gfp_t gfp_flags;
-
amt = rcvhdrq_size(rcd);
- if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
- gfp_flags = GFP_KERNEL;
- else
- gfp_flags = GFP_USER;
rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
&rcd->rcvhdrq_dma,
- gfp_flags | __GFP_COMP);
+ GFP_KERNEL);
if (!rcd->rcvhdrq) {
dd_dev_err(dd,
@@ -1785,7 +1779,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
PAGE_SIZE,
&rcd->rcvhdrqtailaddr_dma,
- gfp_flags);
+ GFP_KERNEL);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
}
@@ -1821,20 +1815,11 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
u32 max_entries, egrtop, alloced_bytes = 0;
- gfp_t gfp_flags;
u16 order, idx = 0;
int ret = 0;
u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu);
/*
- * GFP_USER, but without GFP_FS, so buffer cache can be
- * coalesced (we hope); otherwise, even at order 4,
- * heavy filesystem activity makes these fail, and we can
- * use compound pages.
- */
- gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
- /*
* The minimum size of the eager buffers is a groups of MTU-sized
* buffers.
* The global eager_buffer_size parameter is checked against the
@@ -1864,7 +1849,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
dma_alloc_coherent(&dd->pcidev->dev,
rcd->egrbufs.rcvtid_size,
&rcd->egrbufs.buffers[idx].dma,
- gfp_flags);
+ GFP_KERNEL);
if (rcd->egrbufs.buffers[idx].addr) {
rcd->egrbufs.buffers[idx].len =
rcd->egrbufs.rcvtid_size;
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index aea571943768..07386117f21a 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -2075,7 +2075,7 @@ static void alloc_dummy_hdrq(struct qib_devdata *dd)
dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev,
dd->rcd[0]->rcvhdrq_size,
&dd->cspec->dummy_hdrq_phys,
- GFP_ATOMIC | __GFP_COMP);
+ GFP_ATOMIC);
if (!dd->cspec->dummy_hdrq) {
qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n");
/* fallback to just 0'ing */
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 45211008449f..33667becd52b 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1546,18 +1546,14 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
if (!rcd->rcvhdrq) {
dma_addr_t phys_hdrqtail;
- gfp_t gfp_flags;
amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize *
sizeof(u32), PAGE_SIZE);
- gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
- GFP_USER : GFP_KERNEL;
old_node_id = dev_to_node(&dd->pcidev->dev);
set_dev_node(&dd->pcidev->dev, rcd->node_id);
- rcd->rcvhdrq = dma_alloc_coherent(
- &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
- gfp_flags | __GFP_COMP);
+ rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt,
+ &rcd->rcvhdrq_phys, GFP_KERNEL);
set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvhdrq) {
@@ -1577,7 +1573,7 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
set_dev_node(&dd->pcidev->dev, rcd->node_id);
rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(
&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
- gfp_flags);
+ GFP_KERNEL);
set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
@@ -1621,17 +1617,8 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
struct qib_devdata *dd = rcd->dd;
unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
size_t size;
- gfp_t gfp_flags;
int old_node_id;
- /*
- * GFP_USER, but without GFP_FS, so buffer cache can be
- * coalesced (we hope); otherwise, even at order 4,
- * heavy filesystem activity makes these fail, and we can
- * use compound pages.
- */
- gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP;
-
egrcnt = rcd->rcvegrcnt;
egroff = rcd->rcvegr_tid_base;
egrsize = dd->rcvegrbufsize;
@@ -1663,7 +1650,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
rcd->rcvegrbuf[e] =
dma_alloc_coherent(&dd->pcidev->dev, size,
&rcd->rcvegrbuf_phys[e],
- gfp_flags);
+ GFP_KERNEL);
set_dev_node(&dd->pcidev->dev, old_node_id);
if (!rcd->rcvegrbuf[e])
goto bail_rcvegrbuf_phys;
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9297b741f5e8..f798c44e0903 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -744,9 +744,6 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev,
/* IOMMU can map any pages, so himem can also be used here */
gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
- /* It makes no sense to muck about with huge pages */
- gfp &= ~__GFP_COMP;
-
while (count) {
struct page *page = NULL;
unsigned int order_size;
diff --git a/drivers/media/v4l2-core/videobuf-dma-contig.c b/drivers/media/v4l2-core/videobuf-dma-contig.c
index 52312ce2ba05..f2c439359557 100644
--- a/drivers/media/v4l2-core/videobuf-dma-contig.c
+++ b/drivers/media/v4l2-core/videobuf-dma-contig.c
@@ -36,12 +36,11 @@ struct videobuf_dma_contig_memory {
static int __videobuf_dc_alloc(struct device *dev,
struct videobuf_dma_contig_memory *mem,
- unsigned long size, gfp_t flags)
+ unsigned long size)
{
mem->size = size;
- mem->vaddr = dma_alloc_coherent(dev, mem->size,
- &mem->dma_handle, flags);
-
+ mem->vaddr = dma_alloc_coherent(dev, mem->size, &mem->dma_handle,
+ GFP_KERNEL);
if (!mem->vaddr) {
dev_err(dev, "memory alloc size %ld failed\n", mem->size);
return -ENOMEM;
@@ -258,8 +257,7 @@ static int __videobuf_iolock(struct videobuf_queue *q,
return videobuf_dma_contig_user_get(mem, vb);
/* allocate memory for the read() method */
- if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size),
- GFP_KERNEL))
+ if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size)))
return -ENOMEM;
break;
case V4L2_MEMORY_OVERLAY:
@@ -295,22 +293,18 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
BUG_ON(!mem);
MAGIC_CHECK(mem->magic, MAGIC_DC_MEM);
- if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize),
- GFP_KERNEL | __GFP_COMP))
+ if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize)))
goto error;
- /* Try to remap memory */
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
/* the "vm_pgoff" is just used in v4l2 to find the
* corresponding buffer data structure which is allocated
* earlier and it does not mean the offset from the physical
* buffer start address as usual. So set it to 0 to pass
- * the sanity check in vm_iomap_memory().
+ * the sanity check in dma_mmap_coherent().
*/
vma->vm_pgoff = 0;
-
- retval = vm_iomap_memory(vma, mem->dma_handle, mem->size);
+ retval = dma_mmap_coherent(q->dev, vma, mem->vaddr, mem->dma_handle,
+ mem->size);
if (retval) {
dev_err(q->dev, "mmap: remap failed with error %d. ",
retval);
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 74bc053a2078..7926aaef8f0c 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -1027,16 +1027,14 @@ static int __cnic_alloc_uio_rings(struct cnic_uio_dev *udev, int pages)
udev->l2_ring_size = pages * CNIC_PAGE_SIZE;
udev->l2_ring = dma_alloc_coherent(&udev->pdev->dev, udev->l2_ring_size,
- &udev->l2_ring_map,
- GFP_KERNEL | __GFP_COMP);
+ &udev->l2_ring_map, GFP_KERNEL);
if (!udev->l2_ring)
return -ENOMEM;
udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size;
udev->l2_buf_size = CNIC_PAGE_ALIGN(udev->l2_buf_size);
udev->l2_buf = dma_alloc_coherent(&udev->pdev->dev, udev->l2_buf_size,
- &udev->l2_buf_map,
- GFP_KERNEL | __GFP_COMP);
+ &udev->l2_buf_map, GFP_KERNEL);
if (!udev->l2_buf) {
__cnic_free_uio_rings(udev);
return -ENOMEM;
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index d34bb6ec1490..dfd401d9e362 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -243,7 +243,8 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct smcd_dmb *dmb)
dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len,
&dmb->dma_addr,
- GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC | __GFP_COMP | __GFP_NORETRY);
+ GFP_KERNEL | __GFP_NOWARN |
+ __GFP_NOMEMALLOC | __GFP_NORETRY);
if (!dmb->cpu_addr)
clear_bit(dmb->sba_idx, ism->sba_bitmap);
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 33437d620644..c026a5a5e046 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -498,6 +498,14 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
WARN_ON_ONCE(!dev->coherent_dma_mask);
+ /*
+ * DMA allocations can never be turned back into a page pointer, so
+ * requesting compound pages doesn't make sense (and can't even be
+ * supported at all by various backends).
+ */
+ if (WARN_ON_ONCE(flag & __GFP_COMP))
+ return NULL;
+
if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
return cpu_addr;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 339a990554e7..a34c38bbe28f 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -300,6 +300,37 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
return;
}
+static void *swiotlb_memblock_alloc(unsigned long nslabs, unsigned int flags,
+ int (*remap)(void *tlb, unsigned long nslabs))
+{
+ size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
+ void *tlb;
+
+ /*
+ * By default allocate the bounce buffer memory from low memory, but
+ * allow to pick a location everywhere for hypervisors with guest
+ * memory encryption.
+ */
+ if (flags & SWIOTLB_ANY)
+ tlb = memblock_alloc(bytes, PAGE_SIZE);
+ else
+ tlb = memblock_alloc_low(bytes, PAGE_SIZE);
+
+ if (!tlb) {
+ pr_warn("%s: Failed to allocate %zu bytes tlb structure\n",
+ __func__, bytes);
+ return NULL;
+ }
+
+ if (remap && remap(tlb, nslabs) < 0) {
+ memblock_free(tlb, PAGE_ALIGN(bytes));
+ pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes);
+ return NULL;
+ }
+
+ return tlb;
+}
+
/*
* Statically reserve bounce buffer space and initialize bounce buffer data
* structures for the software IO TLB used to implement the DMA API.
@@ -310,7 +341,6 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
struct io_tlb_mem *mem = &io_tlb_default_mem;
unsigned long nslabs;
size_t alloc_size;
- size_t bytes;
void *tlb;
if (!addressing_limit && !swiotlb_force_bounce)
@@ -326,31 +356,16 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
swiotlb_adjust_nareas(num_possible_cpus());
nslabs = default_nslabs;
- /*
- * By default allocate the bounce buffer memory from low memory, but
- * allow to pick a location everywhere for hypervisors with guest
- * memory encryption.
- */
-retry:
- bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
- if (flags & SWIOTLB_ANY)
- tlb = memblock_alloc(bytes, PAGE_SIZE);
- else
- tlb = memblock_alloc_low(bytes, PAGE_SIZE);
- if (!tlb) {
- pr_warn("%s: failed to allocate tlb structure\n", __func__);
- return;
- }
-
- if (remap && remap(tlb, nslabs) < 0) {
- memblock_free(tlb, PAGE_ALIGN(bytes));
-
+ while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) {
+ if (nslabs <= IO_TLB_MIN_SLABS)
+ return;
nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
- if (nslabs >= IO_TLB_MIN_SLABS)
- goto retry;
+ }
- pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes);
- return;
+ if (default_nslabs != nslabs) {
+ pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs",
+ default_nslabs, nslabs);
+ default_nslabs = nslabs;
}
alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index ba095558b6d1..34250e6022ff 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -21,7 +21,6 @@
#define DEFAULT_GFP \
(GFP_KERNEL | \
- __GFP_COMP | /* compound page lets parts be mapped */ \
__GFP_RETRY_MAYFAIL | /* don't trigger OOM-killer */ \
__GFP_NOWARN) /* no stack trace print - this call is non-critical */
@@ -543,7 +542,7 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size)
void *p;
sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir,
- DEFAULT_GFP, 0);
+ DEFAULT_GFP | __GFP_COMP, 0);
#ifdef CONFIG_SND_DMA_SGBUF
if (!sgt && !get_dma_ops(dmab->dev.dev)) {
if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
@@ -811,7 +810,7 @@ static void *snd_dma_noncoherent_alloc(struct snd_dma_buffer *dmab, size_t size)
void *p;
p = dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr,
- dmab->dev.dir, DEFAULT_GFP);
+ dmab->dev.dir, DEFAULT_GFP | __GFP_COMP);
if (p)
dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->addr);
return p;