summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 12:47:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-26 12:47:20 -0700
commita0433f8cae3ac51f59b4b1863032822aaa2d8164 (patch)
tree9eb7b096aa9f7fa53921e6ff247488f3a55471f5 /mm
parent0aa69d53ac7c30f6184f88f2e310d808b32b35a5 (diff)
parentfcaa174a9c995cf0af3967e55644a1543ea07e36 (diff)
Merge tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux
Pull block updates from Jens Axboe: - NVMe pull request via Keith: - Various cleanups all around (Irvin, Chaitanya, Christophe) - Better struct packing (Christophe JAILLET) - Reduce controller error logs for optional commands (Keith) - Support for >=64KiB block sizes (Daniel Gomez) - Fabrics fixes and code organization (Max, Chaitanya, Daniel Wagner) - bcache updates via Coly: - Fix a race at init time (Mingzhe Zou) - Misc fixes and cleanups (Andrea, Thomas, Zheng, Ye) - use page pinning in the block layer for dio (David) - convert old block dio code to page pinning (David, Christoph) - cleanups for pktcdvd (Andy) - cleanups for rnbd (Guoqing) - use the unchecked __bio_add_page() for the initial single page additions (Johannes) - fix overflows in the Amiga partition handling code (Michael) - improve mq-deadline zoned device support (Bart) - keep passthrough requests out of the IO schedulers (Christoph, Ming) - improve support for flush requests, making them less special to deal with (Christoph) - add bdev holder ops and shutdown methods (Christoph) - fix the name_to_dev_t() situation and use cases (Christoph) - decouple the block open flags from fmode_t (Christoph) - ublk updates and cleanups, including adding user copy support (Ming) - BFQ sanity checking (Bart) - convert brd from radix to xarray (Pankaj) - constify various structures (Thomas, Ivan) - more fine grained persistent reservation ioctl capability checks (Jingbo) - misc fixes and cleanups (Arnd, Azeem, Demi, Ed, Hengqi, Hou, Jan, Jordy, Li, Min, Yu, Zhong, Waiman) * tag 'for-6.5/block-2023-06-23' of git://git.kernel.dk/linux: (266 commits) scsi/sg: don't grab scsi host module reference ext4: Fix warning in blkdev_put() block: don't return -EINVAL for not found names in devt_from_devname cdrom: Fix spectre-v1 gadget block: Improve kernel-doc headers blk-mq: don't insert passthrough request into sw queue bsg: make bsg_class a static const structure ublk: make ublk_chr_class a static const structure aoe: make aoe_class a static const structure block/rnbd: make all 'class' structures const block: fix the exclusive open mask in disk_scan_partitions block: add overflow checks for Amiga partition support block: change all __u32 annotations to __be32 in affs_hardblocks.h block: fix signed int overflow in Amiga partition support block: add capacity validation in bdev_add_partition() block: fine-granular CAP_SYS_ADMIN for Persistent Reservation block: disallow Persistent Reservation on partitions reiserfs: fix blkdev_put() warning from release_journal_dev() block: fix wrong mode for blkdev_get_by_dev() from disk_scan_partitions() block: document the holder argument to blkdev_get_by_path ...
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c58
-rw-r--r--mm/page_io.c8
-rw-r--r--mm/swapfile.c6
3 files changed, 64 insertions, 8 deletions
diff --git a/mm/gup.c b/mm/gup.c
index bbe416236593..0814576b7366 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -51,7 +51,8 @@ static inline void sanity_check_pinned_pages(struct page **pages,
struct page *page = *pages;
struct folio *folio = page_folio(page);
- if (!folio_test_anon(folio))
+ if (is_zero_page(page) ||
+ !folio_test_anon(folio))
continue;
if (!folio_test_large(folio) || folio_test_hugetlb(folio))
VM_BUG_ON_PAGE(!PageAnonExclusive(&folio->page), page);
@@ -132,6 +133,13 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
struct folio *folio;
/*
+ * Don't take a pin on the zero page - it's not going anywhere
+ * and it is used in a *lot* of places.
+ */
+ if (is_zero_page(page))
+ return page_folio(page);
+
+ /*
* Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
* right zone, so fail and let the caller fall back to the slow
* path.
@@ -180,6 +188,8 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
{
if (flags & FOLL_PIN) {
+ if (is_zero_folio(folio))
+ return;
node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
if (folio_test_large(folio))
atomic_sub(refs, &folio->_pincount);
@@ -225,6 +235,13 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
folio_ref_inc(folio);
else if (flags & FOLL_PIN) {
/*
+ * Don't take a pin on the zero page - it's not going anywhere
+ * and it is used in a *lot* of places.
+ */
+ if (is_zero_page(page))
+ return 0;
+
+ /*
* Similar to try_grab_folio(): be sure to *also*
* increment the normal page refcount field at least once,
* so that the page really is pinned.
@@ -258,6 +275,33 @@ void unpin_user_page(struct page *page)
}
EXPORT_SYMBOL(unpin_user_page);
+/**
+ * folio_add_pin - Try to get an additional pin on a pinned folio
+ * @folio: The folio to be pinned
+ *
+ * Get an additional pin on a folio we already have a pin on. Makes no change
+ * if the folio is a zero_page.
+ */
+void folio_add_pin(struct folio *folio)
+{
+ if (is_zero_folio(folio))
+ return;
+
+ /*
+ * Similar to try_grab_folio(): be sure to *also* increment the normal
+ * page refcount field at least once, so that the page really is
+ * pinned.
+ */
+ if (folio_test_large(folio)) {
+ WARN_ON_ONCE(atomic_read(&folio->_pincount) < 1);
+ folio_ref_inc(folio);
+ atomic_inc(&folio->_pincount);
+ } else {
+ WARN_ON_ONCE(folio_ref_count(folio) < GUP_PIN_COUNTING_BIAS);
+ folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
+ }
+}
+
static inline struct folio *gup_folio_range_next(struct page *start,
unsigned long npages, unsigned long i, unsigned int *ntails)
{
@@ -3079,6 +3123,9 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast);
*
* FOLL_PIN means that the pages must be released via unpin_user_page(). Please
* see Documentation/core-api/pin_user_pages.rst for further details.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page() will not remove pins from it.
*/
int pin_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages)
@@ -3110,6 +3157,9 @@ EXPORT_SYMBOL_GPL(pin_user_pages_fast);
*
* FOLL_PIN means that the pages must be released via unpin_user_page(). Please
* see Documentation/core-api/pin_user_pages.rst for details.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page*() will not remove pins from it.
*/
long pin_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
@@ -3143,6 +3193,9 @@ EXPORT_SYMBOL(pin_user_pages_remote);
*
* FOLL_PIN means that the pages must be released via unpin_user_page(). Please
* see Documentation/core-api/pin_user_pages.rst for details.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page*() will not remove pins from it.
*/
long pin_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
@@ -3161,6 +3214,9 @@ EXPORT_SYMBOL(pin_user_pages);
* pin_user_pages_unlocked() is the FOLL_PIN variant of
* get_user_pages_unlocked(). Behavior is the same, except that this one sets
* FOLL_PIN and rejects FOLL_GET.
+ *
+ * Note that if a zero_page is amongst the returned pages, it will not have
+ * pins in it and unpin_user_page*() will not remove pins from it.
*/
long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags)
diff --git a/mm/page_io.c b/mm/page_io.c
index 87b682d18850..684cd3c7b59b 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -338,7 +338,7 @@ static void swap_writepage_bdev_sync(struct page *page,
bio_init(&bio, sis->bdev, &bv, 1,
REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc));
bio.bi_iter.bi_sector = swap_page_sector(page);
- bio_add_page(&bio, page, thp_size(page), 0);
+ __bio_add_page(&bio, page, thp_size(page), 0);
bio_associate_blkg_from_page(&bio, page);
count_swpout_vm_event(page);
@@ -360,7 +360,7 @@ static void swap_writepage_bdev_async(struct page *page,
GFP_NOIO);
bio->bi_iter.bi_sector = swap_page_sector(page);
bio->bi_end_io = end_swap_bio_write;
- bio_add_page(bio, page, thp_size(page), 0);
+ __bio_add_page(bio, page, thp_size(page), 0);
bio_associate_blkg_from_page(bio, page);
count_swpout_vm_event(page);
@@ -468,7 +468,7 @@ static void swap_readpage_bdev_sync(struct page *page,
bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ);
bio.bi_iter.bi_sector = swap_page_sector(page);
- bio_add_page(&bio, page, thp_size(page), 0);
+ __bio_add_page(&bio, page, thp_size(page), 0);
/*
* Keep this task valid during swap readpage because the oom killer may
* attempt to access it in the page fault retry time check.
@@ -488,7 +488,7 @@ static void swap_readpage_bdev_async(struct page *page,
bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL);
bio->bi_iter.bi_sector = swap_page_sector(page);
bio->bi_end_io = end_swap_bio_read;
- bio_add_page(bio, page, thp_size(page), 0);
+ __bio_add_page(bio, page, thp_size(page), 0);
count_vm_event(PSWPIN);
submit_bio(bio);
}
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 274bbf797480..6bc83060df9a 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2539,7 +2539,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
struct block_device *bdev = I_BDEV(inode);
set_blocksize(bdev, old_block_size);
- blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ blkdev_put(bdev, p);
}
inode_lock(inode);
@@ -2770,7 +2770,7 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
if (S_ISBLK(inode->i_mode)) {
p->bdev = blkdev_get_by_dev(inode->i_rdev,
- FMODE_READ | FMODE_WRITE | FMODE_EXCL, p);
+ BLK_OPEN_READ | BLK_OPEN_WRITE, p, NULL);
if (IS_ERR(p->bdev)) {
error = PTR_ERR(p->bdev);
p->bdev = NULL;
@@ -3221,7 +3221,7 @@ bad_swap:
p->cluster_next_cpu = NULL;
if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
set_blocksize(p->bdev, p->old_block_size);
- blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ blkdev_put(p->bdev, p);
}
inode = NULL;
destroy_swap_extents(p);