summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c34
-rw-r--r--mm/hmm.c19
-rw-r--r--mm/kmsan/hooks.c13
-rw-r--r--mm/memblock.c64
-rw-r--r--mm/mm_init.c197
5 files changed, 108 insertions, 219 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index a52dd38d2b4a..13f0259d993c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1621,7 +1621,7 @@ static void filemap_end_dropbehind(struct folio *folio)
* completes. Do that now. If we fail, it's likely because of a big folio -
* just reset dropbehind for that case and latter completions should invalidate.
*/
-static void filemap_end_dropbehind_write(struct folio *folio)
+void folio_end_dropbehind(struct folio *folio)
{
if (!folio_test_dropbehind(folio))
return;
@@ -1638,16 +1638,18 @@ static void filemap_end_dropbehind_write(struct folio *folio)
folio_unlock(folio);
}
}
+EXPORT_SYMBOL_GPL(folio_end_dropbehind);
/**
- * folio_end_writeback - End writeback against a folio.
+ * folio_end_writeback_no_dropbehind - End writeback against a folio.
* @folio: The folio.
*
* The folio must actually be under writeback.
+ * This call is intended for filesystems that need to defer dropbehind.
*
* Context: May be called from process or interrupt context.
*/
-void folio_end_writeback(struct folio *folio)
+void folio_end_writeback_no_dropbehind(struct folio *folio)
{
VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio);
@@ -1663,6 +1665,25 @@ void folio_end_writeback(struct folio *folio)
folio_rotate_reclaimable(folio);
}
+ if (__folio_end_writeback(folio))
+ folio_wake_bit(folio, PG_writeback);
+
+ acct_reclaim_writeback(folio);
+}
+EXPORT_SYMBOL_GPL(folio_end_writeback_no_dropbehind);
+
+/**
+ * folio_end_writeback - End writeback against a folio.
+ * @folio: The folio.
+ *
+ * The folio must actually be under writeback.
+ *
+ * Context: May be called from process or interrupt context.
+ */
+void folio_end_writeback(struct folio *folio)
+{
+ VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio);
+
/*
* Writeback does not hold a folio reference of its own, relying
* on truncation to wait for the clearing of PG_writeback.
@@ -1670,11 +1691,8 @@ void folio_end_writeback(struct folio *folio)
* reused before the folio_wake_bit().
*/
folio_get(folio);
- if (__folio_end_writeback(folio))
- folio_wake_bit(folio, PG_writeback);
-
- filemap_end_dropbehind_write(folio);
- acct_reclaim_writeback(folio);
+ folio_end_writeback_no_dropbehind(folio);
+ folio_end_dropbehind(folio);
folio_put(folio);
}
EXPORT_SYMBOL(folio_end_writeback);
diff --git a/mm/hmm.c b/mm/hmm.c
index 3e00f08722d5..87562914670a 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -806,7 +806,7 @@ dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map,
case PCI_P2PDMA_MAP_NONE:
break;
case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
- attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+ attrs |= DMA_ATTR_MMIO;
pfns[idx] |= HMM_PFN_P2PDMA;
break;
case PCI_P2PDMA_MAP_BUS_ADDR:
@@ -835,8 +835,8 @@ dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map,
if (WARN_ON_ONCE(dma_need_unmap(dev) && !dma_addrs))
goto error;
- dma_addr = dma_map_page(dev, page, 0, map->dma_entry_size,
- DMA_BIDIRECTIONAL);
+ dma_addr = dma_map_phys(dev, paddr, map->dma_entry_size,
+ DMA_BIDIRECTIONAL, attrs);
if (dma_mapping_error(dev, dma_addr))
goto error;
@@ -871,16 +871,17 @@ bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx)
if ((pfns[idx] & valid_dma) != valid_dma)
return false;
+ if (pfns[idx] & HMM_PFN_P2PDMA)
+ attrs |= DMA_ATTR_MMIO;
+
if (pfns[idx] & HMM_PFN_P2PDMA_BUS)
; /* no need to unmap bus address P2P mappings */
- else if (dma_use_iova(state)) {
- if (pfns[idx] & HMM_PFN_P2PDMA)
- attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+ else if (dma_use_iova(state))
dma_iova_unlink(dev, state, idx * map->dma_entry_size,
map->dma_entry_size, DMA_BIDIRECTIONAL, attrs);
- } else if (dma_need_unmap(dev))
- dma_unmap_page(dev, dma_addrs[idx], map->dma_entry_size,
- DMA_BIDIRECTIONAL);
+ else if (dma_need_unmap(dev))
+ dma_unmap_phys(dev, dma_addrs[idx], map->dma_entry_size,
+ DMA_BIDIRECTIONAL, attrs);
pfns[idx] &=
~(HMM_PFN_DMA_MAPPED | HMM_PFN_P2PDMA | HMM_PFN_P2PDMA_BUS);
diff --git a/mm/kmsan/hooks.c b/mm/kmsan/hooks.c
index 97de3d6194f0..90bee565b9bc 100644
--- a/mm/kmsan/hooks.c
+++ b/mm/kmsan/hooks.c
@@ -336,14 +336,16 @@ static void kmsan_handle_dma_page(const void *addr, size_t size,
}
/* Helper function to handle DMA data transfers. */
-void kmsan_handle_dma(struct page *page, size_t offset, size_t size,
+void kmsan_handle_dma(phys_addr_t phys, size_t size,
enum dma_data_direction dir)
{
- u64 page_offset, to_go, addr;
+ struct page *page = phys_to_page(phys);
+ u64 page_offset, to_go;
+ void *addr;
- if (PageHighMem(page))
+ if (PhysHighMem(phys))
return;
- addr = (u64)page_address(page) + offset;
+ addr = page_to_virt(page);
/*
* The kernel may occasionally give us adjacent DMA pages not belonging
* to the same allocation. Process them separately to avoid triggering
@@ -366,8 +368,7 @@ void kmsan_handle_dma_sg(struct scatterlist *sg, int nents,
int i;
for_each_sg(sg, item, nents, i)
- kmsan_handle_dma(sg_page(item), item->offset, item->length,
- dir);
+ kmsan_handle_dma(sg_phys(item), item->length, dir);
}
/* Functions from kmsan-checks.h follow. */
diff --git a/mm/memblock.c b/mm/memblock.c
index 117d963e677c..120a501a887a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1445,70 +1445,6 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
return 0;
}
-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-/**
- * __next_mem_pfn_range_in_zone - iterator for for_each_*_range_in_zone()
- *
- * @idx: pointer to u64 loop variable
- * @zone: zone in which all of the memory blocks reside
- * @out_spfn: ptr to ulong for start pfn of the range, can be %NULL
- * @out_epfn: ptr to ulong for end pfn of the range, can be %NULL
- *
- * This function is meant to be a zone/pfn specific wrapper for the
- * for_each_mem_range type iterators. Specifically they are used in the
- * deferred memory init routines and as such we were duplicating much of
- * this logic throughout the code. So instead of having it in multiple
- * locations it seemed like it would make more sense to centralize this to
- * one new iterator that does everything they need.
- */
-void __init_memblock
-__next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
- unsigned long *out_spfn, unsigned long *out_epfn)
-{
- int zone_nid = zone_to_nid(zone);
- phys_addr_t spa, epa;
-
- __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
- &memblock.memory, &memblock.reserved,
- &spa, &epa, NULL);
-
- while (*idx != U64_MAX) {
- unsigned long epfn = PFN_DOWN(epa);
- unsigned long spfn = PFN_UP(spa);
-
- /*
- * Verify the end is at least past the start of the zone and
- * that we have at least one PFN to initialize.
- */
- if (zone->zone_start_pfn < epfn && spfn < epfn) {
- /* if we went too far just stop searching */
- if (zone_end_pfn(zone) <= spfn) {
- *idx = U64_MAX;
- break;
- }
-
- if (out_spfn)
- *out_spfn = max(zone->zone_start_pfn, spfn);
- if (out_epfn)
- *out_epfn = min(zone_end_pfn(zone), epfn);
-
- return;
- }
-
- __next_mem_range(idx, zone_nid, MEMBLOCK_NONE,
- &memblock.memory, &memblock.reserved,
- &spa, &epa, NULL);
- }
-
- /* signal end of iteration */
- if (out_spfn)
- *out_spfn = ULONG_MAX;
- if (out_epfn)
- *out_epfn = 0;
-}
-
-#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
-
/**
* memblock_alloc_range_nid - allocate boot memory block
* @size: size of memory block to be allocated in bytes
diff --git a/mm/mm_init.c b/mm/mm_init.c
index df614556741a..3db2dea7db4c 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2045,112 +2045,63 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
}
/*
- * This function is meant to pre-load the iterator for the zone init from
- * a given point.
- * Specifically it walks through the ranges starting with initial index
- * passed to it until we are caught up to the first_init_pfn value and
- * exits there. If we never encounter the value we return false indicating
- * there are no valid ranges left.
- */
-static bool __init
-deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
- unsigned long *spfn, unsigned long *epfn,
- unsigned long first_init_pfn)
-{
- u64 j = *i;
-
- if (j == 0)
- __next_mem_pfn_range_in_zone(&j, zone, spfn, epfn);
-
- /*
- * Start out by walking through the ranges in this zone that have
- * already been initialized. We don't need to do anything with them
- * so we just need to flush them out of the system.
- */
- for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) {
- if (*epfn <= first_init_pfn)
- continue;
- if (*spfn < first_init_pfn)
- *spfn = first_init_pfn;
- *i = j;
- return true;
- }
-
- return false;
-}
-
-/*
- * Initialize and free pages. We do it in two loops: first we initialize
- * struct page, then free to buddy allocator, because while we are
- * freeing pages we can access pages that are ahead (computing buddy
- * page in __free_one_page()).
+ * Initialize and free pages.
+ *
+ * At this point reserved pages and struct pages that correspond to holes in
+ * memblock.memory are already intialized so every free range has a valid
+ * memory map around it.
+ * This ensures that access of pages that are ahead of the range being
+ * initialized (computing buddy page in __free_one_page()) always reads a valid
+ * struct page.
*
- * In order to try and keep some memory in the cache we have the loop
- * broken along max page order boundaries. This way we will not cause
- * any issues with the buddy page computation.
+ * In order to try and improve CPU cache locality we have the loop broken along
+ * max page order boundaries.
*/
static unsigned long __init
-deferred_init_maxorder(u64 *i, struct zone *zone, unsigned long *start_pfn,
- unsigned long *end_pfn)
+deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
+ struct zone *zone)
{
- unsigned long mo_pfn = ALIGN(*start_pfn + 1, MAX_ORDER_NR_PAGES);
- unsigned long spfn = *start_pfn, epfn = *end_pfn;
+ int nid = zone_to_nid(zone);
unsigned long nr_pages = 0;
- u64 j = *i;
-
- /* First we loop through and initialize the page values */
- for_each_free_mem_pfn_range_in_zone_from(j, zone, start_pfn, end_pfn) {
- unsigned long t;
-
- if (mo_pfn <= *start_pfn)
- break;
+ phys_addr_t start, end;
+ u64 i = 0;
- t = min(mo_pfn, *end_pfn);
- nr_pages += deferred_init_pages(zone, *start_pfn, t);
+ for_each_free_mem_range(i, nid, 0, &start, &end, NULL) {
+ unsigned long spfn = PFN_UP(start);
+ unsigned long epfn = PFN_DOWN(end);
- if (mo_pfn < *end_pfn) {
- *start_pfn = mo_pfn;
+ if (spfn >= end_pfn)
break;
- }
- }
- /* Reset values and now loop through freeing pages as needed */
- swap(j, *i);
+ spfn = max(spfn, start_pfn);
+ epfn = min(epfn, end_pfn);
- for_each_free_mem_pfn_range_in_zone_from(j, zone, &spfn, &epfn) {
- unsigned long t;
+ while (spfn < epfn) {
+ unsigned long mo_pfn = ALIGN(spfn + 1, MAX_ORDER_NR_PAGES);
+ unsigned long chunk_end = min(mo_pfn, epfn);
- if (mo_pfn <= spfn)
- break;
+ nr_pages += deferred_init_pages(zone, spfn, chunk_end);
+ deferred_free_pages(spfn, chunk_end - spfn);
- t = min(mo_pfn, epfn);
- deferred_free_pages(spfn, t - spfn);
+ spfn = chunk_end;
- if (mo_pfn <= epfn)
- break;
+ if (irqs_disabled())
+ touch_nmi_watchdog();
+ else
+ cond_resched();
+ }
}
return nr_pages;
}
static void __init
-deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
- void *arg)
+deferred_init_memmap_job(unsigned long start_pfn, unsigned long end_pfn,
+ void *arg)
{
- unsigned long spfn, epfn;
struct zone *zone = arg;
- u64 i = 0;
-
- deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
- /*
- * Initialize and free pages in MAX_PAGE_ORDER sized increments so that
- * we can avoid introducing any issues with the buddy allocator.
- */
- while (spfn < end_pfn) {
- deferred_init_maxorder(&i, zone, &spfn, &epfn);
- cond_resched();
- }
+ deferred_init_memmap_chunk(start_pfn, end_pfn, zone);
}
static unsigned int __init
@@ -2164,12 +2115,10 @@ static int __init deferred_init_memmap(void *data)
{
pg_data_t *pgdat = data;
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
- unsigned long spfn = 0, epfn = 0;
- unsigned long first_init_pfn, flags;
+ int max_threads = deferred_page_init_max_threads(cpumask);
+ unsigned long first_init_pfn, last_pfn, flags;
unsigned long start = jiffies;
struct zone *zone;
- int max_threads;
- u64 i = 0;
/* Bind memory initialisation thread to a local node if possible */
if (!cpumask_empty(cpumask))
@@ -2197,24 +2146,20 @@ static int __init deferred_init_memmap(void *data)
/* Only the highest zone is deferred */
zone = pgdat->node_zones + pgdat->nr_zones - 1;
+ last_pfn = SECTION_ALIGN_UP(zone_end_pfn(zone));
- max_threads = deferred_page_init_max_threads(cpumask);
+ struct padata_mt_job job = {
+ .thread_fn = deferred_init_memmap_job,
+ .fn_arg = zone,
+ .start = first_init_pfn,
+ .size = last_pfn - first_init_pfn,
+ .align = PAGES_PER_SECTION,
+ .min_chunk = PAGES_PER_SECTION,
+ .max_threads = max_threads,
+ .numa_aware = false,
+ };
- while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) {
- first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION);
- struct padata_mt_job job = {
- .thread_fn = deferred_init_memmap_chunk,
- .fn_arg = zone,
- .start = spfn,
- .size = first_init_pfn - spfn,
- .align = PAGES_PER_SECTION,
- .min_chunk = PAGES_PER_SECTION,
- .max_threads = max_threads,
- .numa_aware = false,
- };
-
- padata_do_multithreaded(&job);
- }
+ padata_do_multithreaded(&job);
/* Sanity check that the next zone really is unpopulated */
WARN_ON(pgdat->nr_zones < MAX_NR_ZONES && populated_zone(++zone));
@@ -2239,12 +2184,11 @@ static int __init deferred_init_memmap(void *data)
*/
bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
{
- unsigned long nr_pages_needed = ALIGN(1 << order, PAGES_PER_SECTION);
+ unsigned long nr_pages_needed = SECTION_ALIGN_UP(1 << order);
pg_data_t *pgdat = zone->zone_pgdat;
unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
unsigned long spfn, epfn, flags;
unsigned long nr_pages = 0;
- u64 i = 0;
/* Only the last zone may have deferred pages */
if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
@@ -2261,37 +2205,26 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
return true;
}
- /* If the zone is empty somebody else may have cleared out the zone */
- if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
- first_deferred_pfn)) {
- pgdat->first_deferred_pfn = ULONG_MAX;
- pgdat_resize_unlock(pgdat, &flags);
- /* Retry only once. */
- return first_deferred_pfn != ULONG_MAX;
+ /*
+ * Initialize at least nr_pages_needed in section chunks.
+ * If a section has less free memory than nr_pages_needed, the next
+ * section will be also initialized.
+ * Note, that it still does not guarantee that allocation of order can
+ * be satisfied if the sections are fragmented because of memblock
+ * allocations.
+ */
+ for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1);
+ nr_pages < nr_pages_needed && spfn < zone_end_pfn(zone);
+ spfn = epfn, epfn += PAGES_PER_SECTION) {
+ nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone);
}
/*
- * Initialize and free pages in MAX_PAGE_ORDER sized increments so
- * that we can avoid introducing any issues with the buddy
- * allocator.
+ * There were no pages to initialize and free which means the zone's
+ * memory map is completely initialized.
*/
- while (spfn < epfn) {
- /* update our first deferred PFN for this section */
- first_deferred_pfn = spfn;
-
- nr_pages += deferred_init_maxorder(&i, zone, &spfn, &epfn);
- touch_nmi_watchdog();
-
- /* We should only stop along section boundaries */
- if ((first_deferred_pfn ^ spfn) < PAGES_PER_SECTION)
- continue;
-
- /* If our quota has been met we can stop here */
- if (nr_pages >= nr_pages_needed)
- break;
- }
+ pgdat->first_deferred_pfn = nr_pages ? spfn : ULONG_MAX;
- pgdat->first_deferred_pfn = spfn;
pgdat_resize_unlock(pgdat, &flags);
return nr_pages > 0;