summaryrefslogtreecommitdiff
path: root/arch/powerpc/platforms/pseries/iommu.c
diff options
context:
space:
mode:
authorAlexey Kardashevskiy <aik@ozlabs.ru>2020-10-29 12:52:41 +1100
committerChristoph Hellwig <hch@lst.de>2020-11-27 10:33:42 +0100
commitbf6e2d562bbc4d115cf322b0bca57fe5bbd26f48 (patch)
tree45002ea26764949ddcf4e62e203ec0eb4f09b5c9 /arch/powerpc/platforms/pseries/iommu.c
parent8d8d53cf8fd028310b1189165b939cde124895d7 (diff)
powerpc/dma: Fallback to dma_ops when persistent memory present
So far we have been using huge DMA windows to map all the RAM available. The RAM is normally mapped to the VM address space contiguously, and there is always a reasonable upper limit for possible future hot plugged RAM which makes it easy to map all RAM via IOMMU. Now there is persistent memory ("ibm,pmemory" in the FDT) which (unlike normal RAM) can map anywhere in the VM space beyond the maximum RAM size and since it can be used for DMA, it requires extending the huge window up to MAX_PHYSMEM_BITS which requires hypervisor support for: 1. huge TCE tables; 2. multilevel TCE tables; 3. huge IOMMU pages. Certain hypervisors cannot do either so the only option left is restricting the huge DMA window to include only RAM and fallback to the default DMA window for persistent memory. This defines arch_dma_map_direct/etc to allow generic DMA code perform additional checks on whether direct DMA is still possible. This checks if the system has persistent memory. If it does not, the DMA bypass mode is selected, i.e. * dev->bus_dma_limit = 0 * dev->dma_ops_bypass = true <- this avoid calling dma_ops for mapping. If there is such memory, this creates identity mapping only for RAM and sets the dev->bus_dma_limit to let the generic code decide whether to call into the direct DMA or the indirect DMA ops. This should not change the existing behaviour when no persistent memory as dev->dma_ops_bypass is expected to be set. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Acked-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'arch/powerpc/platforms/pseries/iommu.c')
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c51
1 files changed, 41 insertions, 10 deletions
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index e4198700ed1a..9fc5217f0c8e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -839,7 +839,7 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
np, ret);
}
-static u64 find_existing_ddw(struct device_node *pdn)
+static u64 find_existing_ddw(struct device_node *pdn, int *window_shift)
{
struct direct_window *window;
const struct dynamic_dma_window_prop *direct64;
@@ -851,6 +851,7 @@ static u64 find_existing_ddw(struct device_node *pdn)
if (window->device == pdn) {
direct64 = window->prop;
dma_addr = be64_to_cpu(direct64->dma_base);
+ *window_shift = be32_to_cpu(direct64->window_shift);
break;
}
}
@@ -1111,11 +1112,12 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
*/
static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
{
- int len, ret;
+ int len = 0, ret;
+ int max_ram_len = order_base_2(ddw_memory_hotplug_max());
struct ddw_query_response query;
struct ddw_create_response create;
int page_shift;
- u64 dma_addr, max_addr;
+ u64 dma_addr;
struct device_node *dn;
u32 ddw_avail[DDW_APPLICABLE_SIZE];
struct direct_window *window;
@@ -1123,10 +1125,15 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
struct dynamic_dma_window_prop *ddwprop;
struct failed_ddw_pdn *fpdn;
bool default_win_removed = false;
+ bool pmem_present;
+
+ dn = of_find_node_by_type(NULL, "ibm,pmemory");
+ pmem_present = dn != NULL;
+ of_node_put(dn);
mutex_lock(&direct_window_init_mutex);
- dma_addr = find_existing_ddw(pdn);
+ dma_addr = find_existing_ddw(pdn, &len);
if (dma_addr != 0)
goto out_unlock;
@@ -1212,14 +1219,29 @@ static u64 enable_ddw(struct pci_dev *dev, struct device_node *pdn)
}
/* verify the window * number of ptes will map the partition */
/* check largest block * page size > max memory hotplug addr */
- max_addr = ddw_memory_hotplug_max();
- if (query.largest_available_block < (max_addr >> page_shift)) {
- dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
- "%llu-sized pages\n", max_addr, query.largest_available_block,
- 1ULL << page_shift);
+ /*
+ * The "ibm,pmemory" can appear anywhere in the address space.
+ * Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
+ * for the upper limit and fallback to max RAM otherwise but this
+ * disables device::dma_ops_bypass.
+ */
+ len = max_ram_len;
+ if (pmem_present) {
+ if (query.largest_available_block >=
+ (1ULL << (MAX_PHYSMEM_BITS - page_shift)))
+ len = MAX_PHYSMEM_BITS - page_shift;
+ else
+ dev_info(&dev->dev, "Skipping ibm,pmemory");
+ }
+
+ if (query.largest_available_block < (1ULL << (len - page_shift))) {
+ dev_dbg(&dev->dev,
+ "can't map partition max 0x%llx with %llu %llu-sized pages\n",
+ 1ULL << len,
+ query.largest_available_block,
+ 1ULL << page_shift);
goto out_failed;
}
- len = order_base_2(max_addr);
win64 = kzalloc(sizeof(struct property), GFP_KERNEL);
if (!win64) {
dev_info(&dev->dev,
@@ -1299,6 +1321,15 @@ out_failed:
out_unlock:
mutex_unlock(&direct_window_init_mutex);
+
+ /*
+ * If we have persistent memory and the window size is only as big
+ * as RAM, then we failed to create a window to cover persistent
+ * memory and need to set the DMA limit.
+ */
+ if (pmem_present && dma_addr && (len == max_ram_len))
+ dev->dev.bus_dma_limit = dma_addr + (1ULL << len);
+
return dma_addr;
}