From 6c434d6176c0cb42847c33245189667d645db7bf Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 11 May 2015 10:15:49 +0200 Subject: x86/mm: Do not flush last cacheline twice in clflush_cache_range() The current algorithm used in clflush_cache_range() can cause the last cache line of the buffer to be flushed twice. Fix that algorithm so that each cache line will only be flushed once. Reported-by: H. Peter Anvin Signed-off-by: Ross Zwisler Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/1430259192-18802-1-git-send-email-ross.zwisler@linux.intel.com Link: http://lkml.kernel.org/r/1431332153-18566-5-git-send-email-bp@alien8.de [ Changed it to 'void *' to simplify the type conversions. ] Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 89af288ec674..5ddd9005f6c3 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -129,16 +129,15 @@ within(unsigned long addr, unsigned long start, unsigned long end) */ void clflush_cache_range(void *vaddr, unsigned int size) { - void *vend = vaddr + size - 1; + unsigned long clflush_mask = boot_cpu_data.x86_clflush_size - 1; + void *vend = vaddr + size; + void *p; mb(); - for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) - clflushopt(vaddr); - /* - * Flush any possible final partial cacheline: - */ - clflushopt(vend); + for (p = (void *)((unsigned long)vaddr & ~clflush_mask); + p < vend; p += boot_cpu_data.x86_clflush_size) + clflushopt(p); mb(); } -- cgit From e4b6be33c28923d8cde53023e0888b1c5d1a9027 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 11 May 2015 10:15:53 +0200 Subject: x86/mm: Add ioremap_uc() helper to map memory uncacheable (not UC-) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ioremap_nocache() currently uses UC- by default. Our goal is to eventually make UC the default. Linux maps UC- to PCD=1, PWT=0 page attributes on non-PAT systems. Linux maps UC to PCD=1, PWT=1 page attributes on non-PAT systems. On non-PAT and PAT systems a WC MTRR has different effects on pages with either of these attributes. In order to help with a smooth transition its best to enable use of UC (PCD,1, PWT=1) on a region as that ensures a WC MTRR will have no effect on a region, this however requires us to have an way to declare a region as UC and we currently do not have a way to do this. WC MTRR on non-PAT system with PCD=1, PWT=0 (UC-) yields WC. WC MTRR on non-PAT system with PCD=1, PWT=1 (UC) yields UC. WC MTRR on PAT system with PCD=1, PWT=0 (UC-) yields WC. WC MTRR on PAT system with PCD=1, PWT=1 (UC) yields UC. A flip of the default ioremap_nocache() behaviour from UC- to UC can therefore regress a memory region from effective memory type WC to UC if MTRRs are used. Use of MTRRs should be phased out and in the best case only arch_phys_wc_add() use will remain, even if this happens arch_phys_wc_add() will have an effect on non-PAT systems and changes to default ioremap_nocache() behaviour could regress drivers. Now, ideally we'd use ioremap_nocache() on the regions in which we'd need uncachable memory types and avoid any MTRRs on those regions. There are however some restrictions on MTRRs use, such as the requirement of having the base and size of variable sized MTRRs to be powers of two, which could mean having to use a WC MTRR over a large area which includes a region in which write-combining effects are undesirable. Add ioremap_uc() to help with the both phasing out of MTRR use and also provide a way to blacklist small WC undesirable regions in devices with mixed regions which are size-implicated to use large WC MTRRs. Use of ioremap_uc() helps phase out MTRR use by avoiding regressions with an eventual flip of default behaviour or ioremap_nocache() from UC- to UC. Drivers working with WC MTRRs can use the below table to review and consider the use of ioremap*() and similar helpers to ensure appropriate behaviour long term even if default ioremap_nocache() behaviour changes from UC- to UC. Although ioremap_uc() is being added we leave set_memory_uc() to use UC- as only initial memory type setup is required to be able to accommodate existing device drivers and phase out MTRR use. It should also be clarified that set_memory_uc() cannot be used with IO memory, even though its use will not return any errors, it really has no effect. ---------------------------------------------------------------------- MTRR Non-PAT PAT Linux ioremap value Effective memory type ---------------------------------------------------------------------- Non-PAT | PAT PAT |PCD ||PWT ||| WC 000 WB _PAGE_CACHE_MODE_WB WC | WC WC 001 WC _PAGE_CACHE_MODE_WC WC* | WC WC 010 UC- _PAGE_CACHE_MODE_UC_MINUS WC* | WC WC 011 UC _PAGE_CACHE_MODE_UC UC | UC ---------------------------------------------------------------------- Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Acked-by: H. Peter Anvin Cc: Andy Lutomirski Cc: Antonino Daplas Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Daniel Vetter Cc: Dave Airlie Cc: Davidlohr Bueso Cc: Denys Vlasenko Cc: Jean-Christophe Plagniol-Villard Cc: Juergen Gross Cc: Linus Torvalds Cc: Mel Gorman Cc: Mike Travis Cc: Peter Zijlstra Cc: Suresh Siddha Cc: Thierry Reding Cc: Thomas Gleixner Cc: Tomi Valkeinen Cc: Toshi Kani Cc: Ville Syrjälä Cc: Vlastimil Babka Cc: Will Deacon Cc: linux-fbdev@vger.kernel.org Link: http://lkml.kernel.org/r/1430343851-967-2-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1431332153-18566-9-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 5ddd9005f6c3..c77abd7f92a2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1467,6 +1467,9 @@ int _set_memory_uc(unsigned long addr, int numpages) { /* * for now UC MINUS. see comments in ioremap_nocache() + * If you really need strong UC use ioremap_uc(), but note + * that you cannot override IO areas with set_memory_*() as + * these helpers cannot work with IO memory. */ return change_page_attr_set(&addr, numpages, cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), -- cgit From 1fcb61c52bbdbbc46d132acf7dab9ad0eca433fe Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 23 Apr 2015 01:07:08 -0700 Subject: x86/mm/pageattr: Remove an unused variable in slow_virt_to_phys() The patch doesn't change any logic. Signed-off-by: Dexuan Cui Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1429776428-4475-1-git-send-email-decui@microsoft.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index c77abd7f92a2..397838eb292b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -417,13 +417,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr) phys_addr_t phys_addr; unsigned long offset; enum pg_level level; - unsigned long psize; unsigned long pmask; pte_t *pte; pte = lookup_address(virt_addr, &level); BUG_ON(!pte); - psize = page_level_size(level); pmask = page_level_mask(level); offset = virt_addr & ~pmask; phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; -- cgit From cb32edf65bf2197a2d2226e94c7602dc92e295bb Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 26 May 2015 10:28:15 +0200 Subject: x86/mm/pat: Wrap pat_enabled into a function API We use pat_enabled in x86-specific code to see if PAT is enabled or not but we're granting full access to it even though readers do not need to set it. If, for instance, we granted access to it to modules later they then could override the variable setting... no bueno. This renames pat_enabled to a new static variable __pat_enabled. Folks are redirected to use pat_enabled() now. Code that sets this can only be internal to pat.c. Apart from the early kernel parameter "nopat" to disable PAT, we also have a few cases that disable it later and make use of a helper pat_disable(). It is wrapped under an ifdef but since that code cannot run unless PAT was enabled its not required to wrap it with ifdefs, unwrap that. Likewise, since "nopat" doesn't really change non-PAT systems just remove that ifdef as well. Although we could add and use an early_param_off(), these helpers don't use __read_mostly but we want to keep __read_mostly for __pat_enabled as this is a hot path -- upon boot, for instance, a simple guest may see ~4k accesses to pat_enabled(). Since __read_mostly early boot params are not that common we don't add a helper for them just yet. Signed-off-by: Luis R. Rodriguez Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: Andy Walls Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Brian Gerst Cc: Christoph Lameter Cc: Daniel Vetter Cc: Dave Airlie Cc: Denys Vlasenko Cc: Doug Ledford Cc: H. Peter Anvin Cc: Juergen Gross Cc: Kyle McMartin Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1430425520-22275-3-git-send-email-mcgrof@do-not-panic.com Link: http://lkml.kernel.org/r/1432628901-18044-13-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 397838eb292b..70d221fe2eb4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1571,7 +1571,7 @@ int set_memory_wc(unsigned long addr, int numpages) { int ret; - if (!pat_enabled) + if (!pat_enabled()) return set_memory_uc(addr, numpages); ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, -- cgit From d6472302f242559d45dcf4ebace62508dc4d8aeb Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 2 Jun 2015 19:01:38 +1000 Subject: x86/mm: Decouple from Nothing in uses anything from , so remove it from there and fix up the resulting build problems triggered on x86 {64|32}-bit {def|allmod|allno}configs. The breakages were triggering in places where x86 builds relied on vmalloc() facilities but did not include explicitly and relied on the implicit inclusion via . Also add: - to - to ... which were two other implicit header file dependencies. Suggested-by: David Miller Signed-off-by: Stephen Rothwell [ Tidied up the changelog. ] Acked-by: David Miller Acked-by: Takashi Iwai Acked-by: Viresh Kumar Acked-by: Vinod Koul Cc: Andrew Morton Cc: Anton Vorontsov Cc: Boris Ostrovsky Cc: Colin Cross Cc: David Vrabel Cc: H. Peter Anvin Cc: Haiyang Zhang Cc: James E.J. Bottomley Cc: Jaroslav Kysela Cc: K. Y. Srinivasan Cc: Kees Cook Cc: Konrad Rzeszutek Wilk Cc: Kristen Carlson Accardi Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Suma Ramars Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 70d221fe2eb4..fae3c5366ac0 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include -- cgit From 7202fdb1b3299ec78dc1e7702260947ec20dd9e9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 4 Jun 2015 18:55:11 +0200 Subject: x86/mm/pat: Remove pat_enabled() checks Now that we emulate a PAT table when PAT is disabled, there's no need for those checks anymore as the PAT abstraction will handle those cases too. Based on a conglomerate patch from Toshi Kani. Signed-off-by: Borislav Petkov Reviewed-by: Toshi Kani Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-4-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index fae3c5366ac0..31b4f3fd1207 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1572,9 +1572,6 @@ int set_memory_wc(unsigned long addr, int numpages) { int ret; - if (!pat_enabled()) - return set_memory_uc(addr, numpages); - ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_WC, NULL); if (ret) -- cgit From 623dffb2a2e059e1ace45b59b3ff21c66c419614 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 4 Jun 2015 18:55:20 +0200 Subject: x86/mm/pat: Add set_memory_wt() for Write-Through type Now that reserve_ram_pages_type() accepts the WT type, add set_memory_wt(), set_memory_array_wt() and set_pages_array_wt() in order to be able to set memory to Write-Through page cache mode. Also, extend ioremap_change_attr() to accept the WT type. Signed-off-by: Toshi Kani Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Andy Lutomirski Cc: Elliott@hp.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: hch@lst.de Cc: hmh@hmh.eng.br Cc: jgross@suse.com Cc: konrad.wilk@oracle.com Cc: linux-mm Cc: linux-nvdimm@lists.01.org Cc: stefan.bader@canonical.com Cc: yigal@plexistor.com Link: http://lkml.kernel.org/r/1433436928-31903-13-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 62 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 13 deletions(-) (limited to 'arch/x86/mm/pageattr.c') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 31b4f3fd1207..727158cb3b3c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1503,12 +1503,10 @@ EXPORT_SYMBOL(set_memory_uc); static int _set_memory_array(unsigned long *addr, int addrinarray, enum page_cache_mode new_type) { + enum page_cache_mode set_type; int i, j; int ret; - /* - * for now UC MINUS. see comments in ioremap_nocache() - */ for (i = 0; i < addrinarray; i++) { ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, new_type, NULL); @@ -1516,9 +1514,12 @@ static int _set_memory_array(unsigned long *addr, int addrinarray, goto out_free; } + /* If WC, set to UC- first and then WC */ + set_type = (new_type == _PAGE_CACHE_MODE_WC) ? + _PAGE_CACHE_MODE_UC_MINUS : new_type; + ret = change_page_attr_set(addr, addrinarray, - cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS), - 1); + cachemode2pgprot(set_type), 1); if (!ret && new_type == _PAGE_CACHE_MODE_WC) ret = change_page_attr_set_clr(addr, addrinarray, @@ -1550,6 +1551,12 @@ int set_memory_array_wc(unsigned long *addr, int addrinarray) } EXPORT_SYMBOL(set_memory_array_wc); +int set_memory_array_wt(unsigned long *addr, int addrinarray) +{ + return _set_memory_array(addr, addrinarray, _PAGE_CACHE_MODE_WT); +} +EXPORT_SYMBOL_GPL(set_memory_array_wt); + int _set_memory_wc(unsigned long addr, int numpages) { int ret; @@ -1575,21 +1582,39 @@ int set_memory_wc(unsigned long addr, int numpages) ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_WC, NULL); if (ret) - goto out_err; + return ret; ret = _set_memory_wc(addr, numpages); if (ret) - goto out_free; + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); - return 0; - -out_free: - free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); -out_err: return ret; } EXPORT_SYMBOL(set_memory_wc); +int _set_memory_wt(unsigned long addr, int numpages) +{ + return change_page_attr_set(&addr, numpages, + cachemode2pgprot(_PAGE_CACHE_MODE_WT), 0); +} + +int set_memory_wt(unsigned long addr, int numpages) +{ + int ret; + + ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, + _PAGE_CACHE_MODE_WT, NULL); + if (ret) + return ret; + + ret = _set_memory_wt(addr, numpages); + if (ret) + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); + + return ret; +} +EXPORT_SYMBOL_GPL(set_memory_wt); + int _set_memory_wb(unsigned long addr, int numpages) { /* WB cache mode is hard wired to all cache attribute bits being 0 */ @@ -1680,6 +1705,7 @@ static int _set_pages_array(struct page **pages, int addrinarray, { unsigned long start; unsigned long end; + enum page_cache_mode set_type; int i; int free_idx; int ret; @@ -1693,8 +1719,12 @@ static int _set_pages_array(struct page **pages, int addrinarray, goto err_out; } + /* If WC, set to UC- first and then WC */ + set_type = (new_type == _PAGE_CACHE_MODE_WC) ? + _PAGE_CACHE_MODE_UC_MINUS : new_type; + ret = cpa_set_pages_array(pages, addrinarray, - cachemode2pgprot(_PAGE_CACHE_MODE_UC_MINUS)); + cachemode2pgprot(set_type)); if (!ret && new_type == _PAGE_CACHE_MODE_WC) ret = change_page_attr_set_clr(NULL, addrinarray, cachemode2pgprot( @@ -1728,6 +1758,12 @@ int set_pages_array_wc(struct page **pages, int addrinarray) } EXPORT_SYMBOL(set_pages_array_wc); +int set_pages_array_wt(struct page **pages, int addrinarray) +{ + return _set_pages_array(pages, addrinarray, _PAGE_CACHE_MODE_WT); +} +EXPORT_SYMBOL_GPL(set_pages_array_wt); + int set_pages_wb(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); -- cgit