From dd1ea5763e65c69983697dc97ea9d6e3cb9df922 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Nov 2017 19:52:29 +0200 Subject: powerpc/pci: Use for_each_pci_bridge() helper Use for_each_pci_bridge() helper to make the code slightly cleaner. No functional change intended. Requires: 24a0c654d7d6 ("PCI: Add for_each_pci_bridge() helper") Signed-off-by: Andy Shevchenko Signed-off-by: Bjorn Helgaas Acked-by: Michael Ellerman --- arch/powerpc/kernel/pci-hotplug.c | 7 ++----- arch/powerpc/kernel/pci_of_scan.c | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 2d71269e7dc1..741f47295188 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -134,11 +134,8 @@ void pci_hp_add_devices(struct pci_bus *bus) pcibios_setup_bus_devices(bus); max = bus->busn_res.start; for (pass = 0; pass < 2; pass++) { - list_for_each_entry(dev, &bus->devices, bus_list) { - if (pci_is_bridge(dev)) - max = pci_scan_bridge(bus, dev, - max, pass); - } + for_each_pci_bridge(dev, bus) + max = pci_scan_bridge(bus, dev, max, pass); } } pcibios_finish_adding_to_bus(bus); diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c index 0d790f8432d2..8bdaa2a6fa62 100644 --- a/arch/powerpc/kernel/pci_of_scan.c +++ b/arch/powerpc/kernel/pci_of_scan.c @@ -369,11 +369,8 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus, pcibios_setup_bus_devices(bus); /* Now scan child busses */ - list_for_each_entry(dev, &bus->devices, bus_list) { - if (pci_is_bridge(dev)) { - of_scan_pci_bridge(dev); - } - } + for_each_pci_bridge(dev, bus) + of_scan_pci_bridge(dev); } /** -- cgit From 9d987eba8ed794b9fcf0d08cf1b312d2ece598de Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 10 Nov 2017 19:52:30 +0200 Subject: powerpc/pci: Unroll two pass loop when scanning bridges The current scanning code is really hard to understand because it calls the same function in a loop where pass value is changed without any comments explaining it: for (pass = 0; pass < 2; pass++) for_each_pci_bridge(dev, bus) max = pci_scan_bridge(bus, dev, max, pass); Unfamiliar reader cannot tell easily what is the purpose of this loop without looking at internals of pci_scan_bridge(). In order to make this bit easier to understand, open-code the loop in pci_scan_child_bus() and pci_hp_add_bridge() with added comments. No functional changes intended. Signed-off-by: Andy Shevchenko Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg --- arch/powerpc/kernel/pci-hotplug.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 741f47295188..cf47b1aec4c2 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -104,7 +104,7 @@ EXPORT_SYMBOL_GPL(pci_hp_remove_devices); */ void pci_hp_add_devices(struct pci_bus *bus) { - int slotno, mode, pass, max; + int slotno, mode, max; struct pci_dev *dev; struct pci_controller *phb; struct device_node *dn = pci_bus_to_OF_node(bus); @@ -133,10 +133,17 @@ void pci_hp_add_devices(struct pci_bus *bus) pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); pcibios_setup_bus_devices(bus); max = bus->busn_res.start; - for (pass = 0; pass < 2; pass++) { - for_each_pci_bridge(dev, bus) - max = pci_scan_bridge(bus, dev, max, pass); - } + /* + * Scan bridges that are already configured. We don't touch + * them unless they are misconfigured (which will be done in + * the second scan below). + */ + for_each_pci_bridge(dev, bus) + max = pci_scan_bridge(bus, dev, max, 0); + + /* Scan bridges that need to be reconfigured */ + for_each_pci_bridge(dev, bus) + max = pci_scan_bridge(bus, dev, max, 1); } pcibios_finish_adding_to_bus(bus); } -- cgit From 961292e664e2ebfb56cf0733d4587711d7bc4c69 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 4 Dec 2017 18:08:01 -0600 Subject: powerpc: Set I/O port resource types correctly Set I/O port resource structs to have IORESOURCE_IO in their type field. Previously we marked these as merely IORESOURCE_BUSY without indicating the type. Setting the type doesn't fix any functional problem but makes %pR on the resource work better. Signed-off-by: Bjorn Helgaas --- arch/powerpc/platforms/maple/time.c | 2 +- arch/powerpc/sysdev/i8259.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c index 81799d70a1ee..cfddc87f81bf 100644 --- a/arch/powerpc/platforms/maple/time.c +++ b/arch/powerpc/platforms/maple/time.c @@ -134,7 +134,7 @@ int maple_set_rtc_time(struct rtc_time *tm) static struct resource rtc_iores = { .name = "rtc", - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; unsigned long __init maple_get_boot_time(void) diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c index bafb014e1a7e..cb9a8b71fd0f 100644 --- a/arch/powerpc/sysdev/i8259.c +++ b/arch/powerpc/sysdev/i8259.c @@ -145,21 +145,21 @@ static struct resource pic1_iores = { .name = "8259 (master)", .start = 0x20, .end = 0x21, - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; static struct resource pic2_iores = { .name = "8259 (slave)", .start = 0xa0, .end = 0xa1, - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; static struct resource pic_edgectrl_iores = { .name = "8259 edge control", .start = 0x4d0, .end = 0x4d1, - .flags = IORESOURCE_BUSY, + .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; static int i8259_host_match(struct irq_domain *h, struct device_node *node, -- cgit From 24e6d5a59ac7d31adc0322de2d0117dfa370936f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:53 +0100 Subject: mm: pass the vmem_altmap to arch_add_memory and __add_pages We can just pass this on instead of having to do a radix tree lookup without proper locking 2 levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/powerpc/mm/mem.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4362b86ef84c..e670cfc2766e 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; } -int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) +int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -144,7 +145,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) return -EFAULT; } - return __add_pages(nid, start_pfn, nr_pages, want_memblock); + return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); } #ifdef CONFIG_MEMORY_HOTREMOVE -- cgit From 7b73d978a5d0d2a3637bdd57191cb6ffbad3feca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:54 +0100 Subject: mm: pass the vmem_altmap to vmemmap_populate We can just pass this on instead of having to do a radix tree lookup without proper locking a few levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/powerpc/mm/init_64.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index a07722531b32..779b74a96b8f 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys, vmemmap_list = vmem_back; } -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; @@ -193,16 +194,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); for (; start < end; start += page_size) { - struct vmem_altmap *altmap; void *p; int rc; if (vmemmap_populated(start, page_size)) continue; - /* altmap lookups only work at section boundaries */ - altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); - p = __vmemmap_alloc_block_buf(page_size, node, altmap); if (!p) return -ENOMEM; -- cgit From da024512a1fa5c979257e442130ee1d468285057 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:55 +0100 Subject: mm: pass the vmem_altmap to arch_remove_memory and __remove_pages We can just pass this on instead of having to do a radix tree lookup without proper locking 2 levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/powerpc/mm/mem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index e670cfc2766e..22aa528b78a2 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -149,11 +149,10 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, } #ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size) +int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct vmem_altmap *altmap; struct page *page; int ret; @@ -162,11 +161,10 @@ int arch_remove_memory(u64 start, u64 size) * when querying the zone. */ page = pfn_to_page(start_pfn); - altmap = to_vmem_altmap((unsigned long) page); if (altmap) page += vmem_altmap_offset(altmap); - ret = __remove_pages(page_zone(page), start_pfn, nr_pages); + ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); if (ret) return ret; -- cgit From 24b6d4164348370c6b6a58b4248babd85ff9e982 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:56 +0100 Subject: mm: pass the vmem_altmap to vmemmap_free We can just pass this on instead of having to do a radix tree lookup without proper locking a few levels into the callchain. Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/powerpc/mm/init_64.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 779b74a96b8f..db7d4e092157 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -254,7 +254,8 @@ static unsigned long vmemmap_list_free(unsigned long start) return vmem_back->phys; } -void __ref vmemmap_free(unsigned long start, unsigned long end) +void __ref vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; unsigned long page_order = get_order(page_size); @@ -265,7 +266,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) for (; start < end; start += page_size) { unsigned long nr_pages, addr; - struct vmem_altmap *altmap; struct page *section_base; struct page *page; @@ -285,7 +285,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end) section_base = pfn_to_page(vmemmap_section_start(start)); nr_pages = 1 << page_order; - altmap = to_vmem_altmap((unsigned long) section_base); if (altmap) { vmem_altmap_free(altmap, nr_pages); } else if (PageReserved(page)) { -- cgit From a8fc357b2875da8732c91eb085862a0648d82767 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Dec 2017 08:53:58 +0100 Subject: mm: split altmap memory map allocation from normal case No functional changes, just untangling the call chain and document why the altmap is passed around the hotplug code. Signed-off-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Signed-off-by: Dan Williams --- arch/powerpc/mm/init_64.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index db7d4e092157..7a2251d99ed3 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -200,7 +200,10 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, if (vmemmap_populated(start, page_size)) continue; - p = __vmemmap_alloc_block_buf(page_size, node, altmap); + if (altmap) + p = altmap_alloc_block_buf(page_size, altmap); + else + p = vmemmap_alloc_block_buf(page_size, node); if (!p) return -ENOMEM; -- cgit From fdabc3fe998203038a78763c1b3d6ace517e0eea Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 15 Dec 2017 15:31:30 -0600 Subject: PCI: Add #defines for Completion Timeout Disable feature Add #defines for the Completion Timeout Disable feature and use them. No functional change intended. Signed-off-by: Bjorn Helgaas Acked-by: Michael Ellerman --- arch/powerpc/platforms/powernv/eeh-powernv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 4650fb294e7a..2f7cd0ef3cdc 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -1654,14 +1654,14 @@ static int pnv_eeh_restore_vf_config(struct pci_dn *pdn) eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 2, devctl); - /* Disable Completion Timeout */ + /* Disable Completion Timeout if possible */ eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, 4, &cap2); - if (cap2 & 0x10) { + if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL2, 4, &cap2); - cap2 |= 0x10; + cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL2, 4, cap2); -- cgit From 50ed5780d50baa8a51c3d0e8731c09bb0bc1b4b5 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Tue, 19 Dec 2017 00:37:38 -0500 Subject: powerpc/PCI: Deprecate pci_get_bus_and_slot() pci_get_bus_and_slot() is restrictive such that it assumes domain=0 as where a PCI device is present. This restricts the device drivers to be reused for other domain numbers. Getting ready to remove pci_get_bus_and_slot() function in favor of pci_get_domain_bus_and_slot(). Use pci_get_domain_bus_and_slot() with a domain number of 0 as the code is not ready to consume multiple domains and existing code used domain number 0. Signed-off-by: Sinan Kaya Signed-off-by: Bjorn Helgaas Acked-by: Michael Ellerman --- arch/powerpc/kernel/pci_32.c | 3 ++- arch/powerpc/platforms/powermac/feature.c | 2 +- arch/powerpc/sysdev/mv64x60_pci.c | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 1d817f4d97d9..85ad2f78b889 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -96,7 +96,8 @@ make_one_node_map(struct device_node* node, u8 pci_bus) reg = of_get_property(node, "reg", NULL); if (!reg) continue; - dev = pci_get_bus_and_slot(pci_bus, ((reg[0] >> 8) & 0xff)); + dev = pci_get_domain_bus_and_slot(0, pci_bus, + ((reg[0] >> 8) & 0xff)); if (!dev || !dev->subordinate) { pci_dev_put(dev); continue; diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index 9e3f39d36e88..ed8b16694153 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -829,7 +829,7 @@ core99_ata100_enable(struct device_node *node, long value) if (value) { if (pci_device_from_OF_node(node, &pbus, &pid) == 0) - pdev = pci_get_bus_and_slot(pbus, pid); + pdev = pci_get_domain_bus_and_slot(0, pbus, pid); if (pdev == NULL) return 0; rc = pci_enable_device(pdev); diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c index d52b3b81e05f..6fe9104632a0 100644 --- a/arch/powerpc/sysdev/mv64x60_pci.c +++ b/arch/powerpc/sysdev/mv64x60_pci.c @@ -37,7 +37,7 @@ static ssize_t mv64x60_hs_reg_read(struct file *filp, struct kobject *kobj, if (count < MV64X60_VAL_LEN_MAX) return -EINVAL; - phb = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); + phb = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); if (!phb) return -ENODEV; pci_read_config_dword(phb, MV64X60_PCICFG_CPCI_HOTSWAP, &v); @@ -61,7 +61,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj, if (sscanf(buf, "%i", &v) != 1) return -EINVAL; - phb = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); + phb = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); if (!phb) return -ENODEV; pci_write_config_dword(phb, MV64X60_PCICFG_CPCI_HOTSWAP, v); -- cgit From 59f47eff03a08cd2d91310f1c15a5343fa0071e5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 4 Jan 2018 15:12:14 -0600 Subject: powerpc/pci: Use of_irq_parse_and_map_pci() helper Instead of calling both of_irq_parse_pci() and irq_create_of_mapping(), call of_irq_parse_and_map_pci(), which does the same thing. This will allow making of_irq_parse_pci() a private, static function. This changes the logic slightly in that the fallback path will also be taken if irq_create_of_mapping() fails internally. Signed-off-by: Rob Herring [bhelgaas: fold in virq init from Stephen Rothwell ] Signed-off-by: Bjorn Helgaas Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman --- arch/powerpc/kernel/pci-common.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 0ac7aa346c69..344af823c3c4 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -339,8 +339,7 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node) */ static int pci_read_irq_line(struct pci_dev *pci_dev) { - struct of_phandle_args oirq; - unsigned int virq; + unsigned int virq = 0; pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev)); @@ -348,7 +347,7 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) memset(&oirq, 0xff, sizeof(oirq)); #endif /* Try to get a mapping from the device-tree */ - if (of_irq_parse_pci(pci_dev, &oirq)) { + if (!of_irq_parse_and_map_pci(pci_dev, 0, 0)) { u8 line, pin; /* If that fails, lets fallback to what is in the config @@ -372,11 +371,6 @@ static int pci_read_irq_line(struct pci_dev *pci_dev) virq = irq_create_mapping(NULL, line); if (virq) irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW); - } else { - pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %pOF\n", - oirq.args_count, oirq.args[0], oirq.args[1], oirq.np); - - virq = irq_create_of_mapping(&oirq); } if (!virq) { -- cgit From 785a3fab4adbf91b2189c928a59ae219c54ba95e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 23 Oct 2017 07:20:00 -0700 Subject: mm, dax: introduce pfn_t_special() In support of removing the VM_MIXEDMAP indication from DAX VMAs, introduce pfn_t_special() for drivers to indicate that _PAGE_SPECIAL should be used for DAX ptes. This also helps identify drivers like dccssblk that only want to use DAX in a read-only fashion without get_user_pages() support. Ideally we could delete axonram and dcssblk DAX support, but if we need to keep it better make it explicit that axonram and dcssblk only support a sub-set of DAX due to missing _PAGE_DEVMAP support. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Signed-off-by: Dan Williams --- arch/powerpc/sysdev/axonram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index 1b307c80b401..cdbb0e59b3d3 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -151,7 +151,7 @@ __axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_page resource_size_t offset = pgoff * PAGE_SIZE; *kaddr = (void *) bank->io_addr + offset; - *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV); + *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV|PFN_SPECIAL); return (bank->size - offset) / PAGE_SIZE; } -- cgit From 569d0365f571fa6421a5c80bc30d1b2cdab857fe Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 14 Oct 2017 11:33:32 -0700 Subject: dax: require 'struct page' by default for filesystem dax If a dax buffer from a device that does not map pages is passed to read(2) or write(2) as a target for direct-I/O it triggers SIGBUS. If gdb attempts to examine the contents of a dax buffer from a device that does not map pages it triggers SIGBUS. If fork(2) is called on a process with a dax mapping from a device that does not map pages it triggers SIGBUS. 'struct page' is required otherwise several kernel code paths break in surprising ways. Disable filesystem-dax on devices that do not map pages. In addition to needing pfn_to_page() to be valid we also require devmap pages. We need this to detect dax pages in the get_user_pages_fast() path and so that we can stop managing the VM_MIXEDMAP flag. For DAX drivers that have not supported get_user_pages() to date we allow them to opt-in to supporting DAX with the CONFIG_FS_DAX_LIMITED configuration option which requires ->direct_access() to return pfn_t_special() pfns. This leaves DAX support in brd disabled and scheduled for removal. Note that when the initial dax support was being merged a few years back there was concern that struct page was unsuitable for use with next generation persistent memory devices. The theoretical concern was that struct page access, being such a hotly used data structure in the kernel, would lead to media wear out. While that was a reasonable conservative starting position it has not held true in practice. We have long since committed to using devm_memremap_pages() to support higher order kernel functionality that needs get_user_pages() and pfn_to_page(). Cc: Jeff Moyer Cc: Ross Zwisler Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Reviewed-by: Gerald Schaefer Signed-off-by: Dan Williams --- arch/powerpc/platforms/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 5a96a2763e4a..2ce89b42a9f4 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -297,6 +297,7 @@ config AXON_RAM tristate "Axon DDR2 memory device driver" depends on PPC_IBM_CELL_BLADE && BLOCK select DAX + select FS_DAX_LIMITED default m help It registers one block device per Axon's DDR2 memory bank found -- cgit From 3ccfebedd8cf54e291c809c838d8ad5cc00f5688 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Jan 2018 15:20:11 -0500 Subject: powerpc, membarrier: Skip memory barrier in switch_mm() Allow PowerPC to skip the full memory barrier in switch_mm(), and only issue the barrier when scheduling into a task belonging to a process that has registered to use expedited private. Threads targeting the same VM but which belong to different thread groups is a tricky case. It has a few consequences: It turns out that we cannot rely on get_nr_threads(p) to count the number of threads using a VM. We can use (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) instead to skip the synchronize_sched() for cases where the VM only has a single user, and that user only has a single thread. It also turns out that we cannot use for_each_thread() to set thread flags in all threads using a VM, as it only iterates on the thread group. Therefore, test the membarrier state variable directly rather than relying on thread flags. This means membarrier_register_private_expedited() needs to set the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag, issue synchronize_sched(), and only then set MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY which allows private expedited membarrier commands to succeed. membarrier_arch_switch_mm() now tests for the MEMBARRIER_STATE_PRIVATE_EXPEDITED flag. Signed-off-by: Mathieu Desnoyers Acked-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Alan Stern Cc: Alexander Viro Cc: Andrea Parri Cc: Andrew Hunter Cc: Andy Lutomirski Cc: Avi Kivity Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Dave Watson Cc: David Sehr Cc: Greg Hackmann Cc: Linus Torvalds Cc: Maged Michael Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Russell King Cc: Will Deacon Cc: linux-api@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20180129202020.8515-3-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/membarrier.h | 26 ++++++++++++++++++++++++++ arch/powerpc/mm/mmu_context.c | 7 +++++++ 3 files changed, 34 insertions(+) create mode 100644 arch/powerpc/include/asm/membarrier.h (limited to 'arch/powerpc') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 2ed525a44734..a2380de50878 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -140,6 +140,7 @@ config PPC select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_PMEM_API if PPC64 + select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE select ARCH_HAS_SG_CHAIN select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h new file mode 100644 index 000000000000..98ff4f1fcf2b --- /dev/null +++ b/arch/powerpc/include/asm/membarrier.h @@ -0,0 +1,26 @@ +#ifndef _ASM_POWERPC_MEMBARRIER_H +#define _ASM_POWERPC_MEMBARRIER_H + +static inline void membarrier_arch_switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +{ + /* + * Only need the full barrier when switching between processes. + * Barrier when switching from kernel to userspace is not + * required here, given that it is implied by mmdrop(). Barrier + * when switching from userspace to kernel is not needed after + * store to rq->curr. + */ + if (likely(!(atomic_read(&next->membarrier_state) & + MEMBARRIER_STATE_PRIVATE_EXPEDITED) || !prev)) + return; + + /* + * The membarrier system call requires a full memory barrier + * after storing to rq->curr, before going back to user-space. + */ + smp_mb(); +} + +#endif /* _ASM_POWERPC_MEMBARRIER_H */ diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index d60a62bf4fc7..0ab297c4cfad 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -12,6 +12,7 @@ #include #include +#include #include @@ -58,6 +59,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * * On the read side the barrier is in pte_xchg(), which orders * the store to the PTE vs the load of mm_cpumask. + * + * This full barrier is needed by membarrier when switching + * between processes after store to rq->curr, before user-space + * memory accesses. */ smp_mb(); @@ -80,6 +85,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (new_on_cpu) radix_kvm_prefetch_workaround(next); + else + membarrier_arch_switch_mm(prev, next, tsk); /* * The actual HW switching method differs between the various -- cgit From c5f58bd58f432be5d92df33c5458e0bcbee3aadf Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Jan 2018 15:20:13 -0500 Subject: membarrier: Provide GLOBAL_EXPEDITED command Allow expedited membarrier to be used for data shared between processes through shared memory. Processes wishing to receive the membarriers register with MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED. Those which want to issue membarrier invoke MEMBARRIER_CMD_GLOBAL_EXPEDITED. This allows extremely simple kernel-level implementation: we have almost everything we need with the PRIVATE_EXPEDITED barrier code. All we need to do is to add a flag in the mm_struct that will be used to check whether we need to send the IPI to the current thread of each CPU. There is a slight downside to this approach compared to targeting specific shared memory users: when performing a membarrier operation, all registered "global" receivers will get the barrier, even if they don't share a memory mapping with the sender issuing MEMBARRIER_CMD_GLOBAL_EXPEDITED. This registration approach seems to fit the requirement of not disturbing processes that really deeply care about real-time: they simply should not register with MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED. In order to align the membarrier command names, the "MEMBARRIER_CMD_SHARED" command is renamed to "MEMBARRIER_CMD_GLOBAL", keeping an alias of MEMBARRIER_CMD_SHARED to MEMBARRIER_CMD_GLOBAL for UAPI header backward compatibility. Signed-off-by: Mathieu Desnoyers Acked-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Andrea Parri Cc: Andrew Hunter Cc: Andy Lutomirski Cc: Avi Kivity Cc: Benjamin Herrenschmidt Cc: Boqun Feng Cc: Dave Watson Cc: David Sehr Cc: Greg Hackmann Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Maged Michael Cc: Michael Ellerman Cc: Paul E. McKenney Cc: Paul Mackerras Cc: Russell King Cc: Will Deacon Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/20180129202020.8515-5-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/membarrier.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h index 98ff4f1fcf2b..6e20bb5c74ea 100644 --- a/arch/powerpc/include/asm/membarrier.h +++ b/arch/powerpc/include/asm/membarrier.h @@ -13,7 +13,8 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev, * store to rq->curr. */ if (likely(!(atomic_read(&next->membarrier_state) & - MEMBARRIER_STATE_PRIVATE_EXPEDITED) || !prev)) + (MEMBARRIER_STATE_PRIVATE_EXPEDITED | + MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev)) return; /* -- cgit