summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-29 17:29:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-29 17:29:11 -0700
commit65090f30ab791810a3dc840317e57df05018559c (patch)
treef417526656da37109777e89613e140ffc59228bc /fs
parent349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830 (diff)
parent0ed950d1f28142ccd9a9453c60df87853530d778 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "191 patches. Subsystems affected by this patch series: kthread, ia64, scripts, ntfs, squashfs, ocfs2, kernel/watchdog, and mm (gup, pagealloc, slab, slub, kmemleak, dax, debug, pagecache, gup, swap, memcg, pagemap, mprotect, bootmem, dma, tracing, vmalloc, kasan, initialization, pagealloc, and memory-failure)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (191 commits) mm,hwpoison: make get_hwpoison_page() call get_any_page() mm,hwpoison: send SIGBUS with error virutal address mm/page_alloc: split pcp->high across all online CPUs for cpuless nodes mm/page_alloc: allow high-order pages to be stored on the per-cpu lists mm: replace CONFIG_FLAT_NODE_MEM_MAP with CONFIG_FLATMEM mm: replace CONFIG_NEED_MULTIPLE_NODES with CONFIG_NUMA docs: remove description of DISCONTIGMEM arch, mm: remove stale mentions of DISCONIGMEM mm: remove CONFIG_DISCONTIGMEM m68k: remove support for DISCONTIGMEM arc: remove support for DISCONTIGMEM arc: update comment about HIGHMEM implementation alpha: remove DISCONTIGMEM and NUMA mm/page_alloc: move free_the_page mm/page_alloc: fix counting of managed_pages mm/page_alloc: improve memmap_pages dbg msg mm: drop SECTION_SHIFT in code comments mm/page_alloc: introduce vm.percpu_pagelist_high_fraction mm/page_alloc: limit the number of pages on PCP lists when reclaim is active mm/page_alloc: scale the number of pages that are batch freed ...
Diffstat (limited to 'fs')
-rw-r--r--fs/adfs/inode.c1
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/bfs/file.c1
-rw-r--r--fs/binfmt_aout.c4
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_elf_fdpic.c11
-rw-r--r--fs/binfmt_flat.c2
-rw-r--r--fs/block_dev.c1
-rw-r--r--fs/buffer.c25
-rw-r--r--fs/configfs/inode.c8
-rw-r--r--fs/dax.c3
-rw-r--r--fs/ecryptfs/mmap.c13
-rw-r--r--fs/exfat/inode.c1
-rw-r--r--fs/ext2/inode.c4
-rw-r--r--fs/ext4/inode.c2
-rw-r--r--fs/fat/inode.c1
-rw-r--r--fs/fs-writeback.c332
-rw-r--r--fs/fuse/dax.c3
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/hfs/inode.c2
-rw-r--r--fs/hfsplus/inode.c2
-rw-r--r--fs/hpfs/file.c1
-rw-r--r--fs/iomap/buffered-io.c27
-rw-r--r--fs/jfs/inode.c1
-rw-r--r--fs/kernfs/inode.c8
-rw-r--r--fs/libfs.c44
-rw-r--r--fs/minix/inode.c1
-rw-r--r--fs/nilfs2/mdt.c1
-rw-r--r--fs/ntfs/inode.c2
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/cluster/heartbeat.c7
-rw-r--r--fs/ocfs2/cluster/nodemanager.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c2
-rw-r--r--fs/ocfs2/filecheck.c6
-rw-r--r--fs/ocfs2/stackglue.c8
-rw-r--r--fs/omfs/file.c1
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/ramfs/inode.c9
-rw-r--r--fs/squashfs/block.c5
-rw-r--r--fs/squashfs/squashfs_fs_sb.h1
-rw-r--r--fs/squashfs/super.c86
-rw-r--r--fs/sysv/itree.c1
-rw-r--r--fs/udf/file.c1
-rw-r--r--fs/udf/inode.c1
-rw-r--r--fs/ufs/inode.c1
-rw-r--r--fs/xfs/xfs_aops.c4
-rw-r--r--fs/zonefs/super.c4
48 files changed, 410 insertions, 244 deletions
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index fb7ee026d101..adbb3a1edcbf 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -73,6 +73,7 @@ static sector_t _adfs_bmap(struct address_space *mapping, sector_t block)
}
static const struct address_space_operations adfs_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = adfs_readpage,
.writepage = adfs_writepage,
.write_begin = adfs_write_begin,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index d91b0133d95d..75ebd2b576ca 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -453,6 +453,7 @@ static sector_t _affs_bmap(struct address_space *mapping, sector_t block)
}
const struct address_space_operations affs_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = affs_readpage,
.writepage = affs_writepage,
.write_begin = affs_write_begin,
@@ -833,6 +834,7 @@ err_bh:
}
const struct address_space_operations affs_aops_ofs = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = affs_readpage_ofs,
//.writepage = affs_writepage_ofs,
.write_begin = affs_write_begin_ofs,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 0dceefc54b48..7f8544abf636 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -188,6 +188,7 @@ static sector_t bfs_bmap(struct address_space *mapping, sector_t block)
}
const struct address_space_operations bfs_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = bfs_readpage,
.writepage = bfs_writepage,
.write_begin = bfs_write_begin,
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 3e84e9bb9084..145917f734fe 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -222,7 +222,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text,
PROT_READ | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
fd_offset);
if (error != N_TXTADDR(ex))
@@ -230,7 +230,7 @@ static int load_aout_binary(struct linux_binprm * bprm)
error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data,
PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE,
+ MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
fd_offset + ex.a_text);
if (error != N_DATADDR(ex))
return error;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3d73cbb439fa..439ed81e755a 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1070,7 +1070,7 @@ out_free_interp:
elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
!!interpreter, false);
- elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
+ elf_flags = MAP_PRIVATE | MAP_DENYWRITE;
vaddr = elf_ppnt->p_vaddr;
/*
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index ab9c31ddffda..cf4028487dcc 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -928,7 +928,7 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
{
struct elf32_fdpic_loadseg *seg;
struct elf32_phdr *phdr;
- unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0, mflags;
+ unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0;
int loop, ret;
load_addr = params->load_addr;
@@ -948,12 +948,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
}
/* allocate one big anon block for everything */
- mflags = MAP_PRIVATE;
- if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE)
- mflags |= MAP_EXECUTABLE;
-
maddr = vm_mmap(NULL, load_addr, top - base,
- PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0);
+ PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, 0);
if (IS_ERR_VALUE(maddr))
return (int) maddr;
@@ -1046,9 +1042,6 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params,
if (phdr->p_flags & PF_X) prot |= PROT_EXEC;
flags = MAP_PRIVATE | MAP_DENYWRITE;
- if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE)
- flags |= MAP_EXECUTABLE;
-
maddr = 0;
switch (params->flags & ELF_FDPIC_FLAG_ARRANGEMENT) {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a1072c6a2341..5d776f80ee50 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -573,7 +573,7 @@ static int load_flat_file(struct linux_binprm *bprm,
pr_debug("ROM mapping of file (we hope)\n");
textpos = vm_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC,
- MAP_PRIVATE|MAP_EXECUTABLE, 0);
+ MAP_PRIVATE, 0);
if (!textpos || IS_ERR_VALUE(textpos)) {
ret = textpos;
if (!textpos)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6cc4d4cfe0c2..eb34f5c357cf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1754,6 +1754,7 @@ static int blkdev_writepages(struct address_space *mapping,
}
static const struct address_space_operations def_blk_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = blkdev_readpage,
.readahead = blkdev_readahead,
.writepage = blkdev_writepage,
diff --git a/fs/buffer.c b/fs/buffer.c
index ea48c01fb76b..6290c3afdba4 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -589,31 +589,6 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
EXPORT_SYMBOL(mark_buffer_dirty_inode);
/*
- * Mark the page dirty, and set it dirty in the page cache, and mark the inode
- * dirty.
- *
- * If warn is true, then emit a warning if the page is not uptodate and has
- * not been truncated.
- *
- * The caller must hold lock_page_memcg().
- */
-void __set_page_dirty(struct page *page, struct address_space *mapping,
- int warn)
-{
- unsigned long flags;
-
- xa_lock_irqsave(&mapping->i_pages, flags);
- if (page->mapping) { /* Race with truncate? */
- WARN_ON_ONCE(warn && !PageUptodate(page));
- account_page_dirtied(page, mapping);
- __xa_set_mark(&mapping->i_pages, page_index(page),
- PAGECACHE_TAG_DIRTY);
- }
- xa_unlock_irqrestore(&mapping->i_pages, flags);
-}
-EXPORT_SYMBOL_GPL(__set_page_dirty);
-
-/*
* Add a page to the dirty page list.
*
* It is a sad fact of life that this function is called from several places
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index eb5ec3e46283..b601610e9907 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -28,12 +28,6 @@
static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
#endif
-static const struct address_space_operations configfs_aops = {
- .readpage = simple_readpage,
- .write_begin = simple_write_begin,
- .write_end = simple_write_end,
-};
-
static const struct inode_operations configfs_inode_operations ={
.setattr = configfs_setattr,
};
@@ -114,7 +108,7 @@ struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd,
struct inode * inode = new_inode(s);
if (inode) {
inode->i_ino = get_next_ino();
- inode->i_mapping->a_ops = &configfs_aops;
+ inode->i_mapping->a_ops = &ram_aops;
inode->i_op = &configfs_inode_operations;
if (sd->s_iattr) {
diff --git a/fs/dax.c b/fs/dax.c
index 62352cbcf0f4..da41f9363568 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -488,10 +488,11 @@ static void *grab_mapping_entry(struct xa_state *xas,
struct address_space *mapping, unsigned int order)
{
unsigned long index = xas->xa_index;
- bool pmd_downgrade = false; /* splitting PMD entry into PTE entries? */
+ bool pmd_downgrade; /* splitting PMD entry into PTE entries? */
void *entry;
retry:
+ pmd_downgrade = false;
xas_lock_irq(xas);
entry = get_unlocked_entry(xas, order);
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 392e721b50a3..7d85e64ea62f 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -533,7 +533,20 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block)
return block;
}
+#include <linux/buffer_head.h>
+
const struct address_space_operations ecryptfs_aops = {
+ /*
+ * XXX: This is pretty broken for multiple reasons: ecryptfs does not
+ * actually use buffer_heads, and ecryptfs will crash without
+ * CONFIG_BLOCK. But it matches the behavior before the default for
+ * address_space_operations without the ->set_page_dirty method was
+ * cleaned up, so this is the best we can do without maintainer
+ * feedback.
+ */
+#ifdef CONFIG_BLOCK
+ .set_page_dirty = __set_page_dirty_buffers,
+#endif
.writepage = ecryptfs_writepage,
.readpage = ecryptfs_readpage,
.write_begin = ecryptfs_write_begin,
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 1803ef3220fd..ca37d4344361 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -491,6 +491,7 @@ int exfat_block_truncate_page(struct inode *inode, loff_t from)
}
static const struct address_space_operations exfat_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = exfat_readpage,
.readahead = exfat_readahead,
.writepage = exfat_writepage,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 68178b2234bd..dadb121beb22 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -961,6 +961,7 @@ ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc
}
const struct address_space_operations ext2_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = ext2_readpage,
.readahead = ext2_readahead,
.writepage = ext2_writepage,
@@ -975,6 +976,7 @@ const struct address_space_operations ext2_aops = {
};
const struct address_space_operations ext2_nobh_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = ext2_readpage,
.readahead = ext2_readahead,
.writepage = ext2_nobh_writepage,
@@ -990,7 +992,7 @@ const struct address_space_operations ext2_nobh_aops = {
static const struct address_space_operations ext2_dax_aops = {
.writepages = ext2_dax_writepages,
.direct_IO = noop_direct_IO,
- .set_page_dirty = noop_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_no_writeback,
.invalidatepage = noop_invalidatepage,
};
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index fe6045a46599..b8170a008590 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3701,7 +3701,7 @@ static const struct address_space_operations ext4_da_aops = {
static const struct address_space_operations ext4_dax_aops = {
.writepages = ext4_dax_writepages,
.direct_IO = noop_direct_IO,
- .set_page_dirty = noop_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_no_writeback,
.bmap = ext4_bmap,
.invalidatepage = noop_invalidatepage,
.swap_activate = ext4_iomap_swap_activate,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index bab9b202b496..de0c9b013a85 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -342,6 +342,7 @@ int fat_block_truncate_page(struct inode *inode, loff_t from)
}
static const struct address_space_operations fat_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = fat_readpage,
.readahead = fat_readahead,
.writepage = fat_writepage,
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e91980f49388..62193106683d 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -131,25 +131,6 @@ static bool inode_io_list_move_locked(struct inode *inode,
return false;
}
-/**
- * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
- * @inode: inode to be removed
- * @wb: bdi_writeback @inode is being removed from
- *
- * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
- * clear %WB_has_dirty_io if all are empty afterwards.
- */
-static void inode_io_list_del_locked(struct inode *inode,
- struct bdi_writeback *wb)
-{
- assert_spin_locked(&wb->list_lock);
- assert_spin_locked(&inode->i_lock);
-
- inode->i_state &= ~I_SYNC_QUEUED;
- list_del_init(&inode->i_io_list);
- wb_io_lists_depopulated(wb);
-}
-
static void wb_wakeup(struct bdi_writeback *wb)
{
spin_lock_bh(&wb->work_lock);
@@ -244,6 +225,13 @@ void wb_wait_for_completion(struct wb_completion *done)
/* one round can affect upto 5 slots */
#define WB_FRN_MAX_IN_FLIGHT 1024 /* don't queue too many concurrently */
+/*
+ * Maximum inodes per isw. A specific value has been chosen to make
+ * struct inode_switch_wbs_context fit into 1024 bytes kmalloc.
+ */
+#define WB_MAX_INODES_PER_ISW ((1024UL - sizeof(struct inode_switch_wbs_context)) \
+ / sizeof(struct inode *))
+
static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
static struct workqueue_struct *isw_wq;
@@ -279,6 +267,28 @@ void __inode_attach_wb(struct inode *inode, struct page *page)
EXPORT_SYMBOL_GPL(__inode_attach_wb);
/**
+ * inode_cgwb_move_to_attached - put the inode onto wb->b_attached list
+ * @inode: inode of interest with i_lock held
+ * @wb: target bdi_writeback
+ *
+ * Remove the inode from wb's io lists and if necessarily put onto b_attached
+ * list. Only inodes attached to cgwb's are kept on this list.
+ */
+static void inode_cgwb_move_to_attached(struct inode *inode,
+ struct bdi_writeback *wb)
+{
+ assert_spin_locked(&wb->list_lock);
+ assert_spin_locked(&inode->i_lock);
+
+ inode->i_state &= ~I_SYNC_QUEUED;
+ if (wb != &wb->bdi->wb)
+ list_move(&inode->i_io_list, &wb->b_attached);
+ else
+ list_del_init(&inode->i_io_list);
+ wb_io_lists_depopulated(wb);
+}
+
+/**
* locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
* @inode: inode of interest with i_lock held
*
@@ -332,11 +342,18 @@ static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
}
struct inode_switch_wbs_context {
- struct inode *inode;
- struct bdi_writeback *new_wb;
+ struct rcu_work work;
- struct rcu_head rcu_head;
- struct work_struct work;
+ /*
+ * Multiple inodes can be switched at once. The switching procedure
+ * consists of two parts, separated by a RCU grace period. To make
+ * sure that the second part is executed for each inode gone through
+ * the first part, all inode pointers are placed into a NULL-terminated
+ * array embedded into struct inode_switch_wbs_context. Otherwise
+ * an inode could be left in a non-consistent state.
+ */
+ struct bdi_writeback *new_wb;
+ struct inode *inodes[];
};
static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
@@ -349,50 +366,23 @@ static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
up_write(&bdi->wb_switch_rwsem);
}
-static void inode_switch_wbs_work_fn(struct work_struct *work)
+static bool inode_do_switch_wbs(struct inode *inode,
+ struct bdi_writeback *old_wb,
+ struct bdi_writeback *new_wb)
{
- struct inode_switch_wbs_context *isw =
- container_of(work, struct inode_switch_wbs_context, work);
- struct inode *inode = isw->inode;
- struct backing_dev_info *bdi = inode_to_bdi(inode);
struct address_space *mapping = inode->i_mapping;
- struct bdi_writeback *old_wb = inode->i_wb;
- struct bdi_writeback *new_wb = isw->new_wb;
XA_STATE(xas, &mapping->i_pages, 0);
struct page *page;
bool switched = false;
- /*
- * If @inode switches cgwb membership while sync_inodes_sb() is
- * being issued, sync_inodes_sb() might miss it. Synchronize.
- */
- down_read(&bdi->wb_switch_rwsem);
-
- /*
- * By the time control reaches here, RCU grace period has passed
- * since I_WB_SWITCH assertion and all wb stat update transactions
- * between unlocked_inode_to_wb_begin/end() are guaranteed to be
- * synchronizing against the i_pages lock.
- *
- * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
- * gives us exclusion against all wb related operations on @inode
- * including IO list manipulations and stat updates.
- */
- if (old_wb < new_wb) {
- spin_lock(&old_wb->list_lock);
- spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
- } else {
- spin_lock(&new_wb->list_lock);
- spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
- }
spin_lock(&inode->i_lock);
xa_lock_irq(&mapping->i_pages);
/*
- * Once I_FREEING is visible under i_lock, the eviction path owns
- * the inode and we shouldn't modify ->i_io_list.
+ * Once I_FREEING or I_WILL_FREE are visible under i_lock, the eviction
+ * path owns the inode and we shouldn't modify ->i_io_list.
*/
- if (unlikely(inode->i_state & I_FREEING))
+ if (unlikely(inode->i_state & (I_FREEING | I_WILL_FREE)))
goto skip_switch;
trace_inode_switch_wbs(inode, old_wb, new_wb);
@@ -419,21 +409,28 @@ static void inode_switch_wbs_work_fn(struct work_struct *work)
wb_get(new_wb);
/*
- * Transfer to @new_wb's IO list if necessary. The specific list
- * @inode was on is ignored and the inode is put on ->b_dirty which
- * is always correct including from ->b_dirty_time. The transfer
- * preserves @inode->dirtied_when ordering.
+ * Transfer to @new_wb's IO list if necessary. If the @inode is dirty,
+ * the specific list @inode was on is ignored and the @inode is put on
+ * ->b_dirty which is always correct including from ->b_dirty_time.
+ * The transfer preserves @inode->dirtied_when ordering. If the @inode
+ * was clean, it means it was on the b_attached list, so move it onto
+ * the b_attached list of @new_wb.
*/
if (!list_empty(&inode->i_io_list)) {
- struct inode *pos;
-
- inode_io_list_del_locked(inode, old_wb);
inode->i_wb = new_wb;
- list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
- if (time_after_eq(inode->dirtied_when,
- pos->dirtied_when))
- break;
- inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
+
+ if (inode->i_state & I_DIRTY_ALL) {
+ struct inode *pos;
+
+ list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
+ if (time_after_eq(inode->dirtied_when,
+ pos->dirtied_when))
+ break;
+ inode_io_list_move_locked(inode, new_wb,
+ pos->i_io_list.prev);
+ } else {
+ inode_cgwb_move_to_attached(inode, new_wb);
+ }
} else {
inode->i_wb = new_wb;
}
@@ -452,31 +449,91 @@ skip_switch:
xa_unlock_irq(&mapping->i_pages);
spin_unlock(&inode->i_lock);
+
+ return switched;
+}
+
+static void inode_switch_wbs_work_fn(struct work_struct *work)
+{
+ struct inode_switch_wbs_context *isw =
+ container_of(to_rcu_work(work), struct inode_switch_wbs_context, work);
+ struct backing_dev_info *bdi = inode_to_bdi(isw->inodes[0]);
+ struct bdi_writeback *old_wb = isw->inodes[0]->i_wb;
+ struct bdi_writeback *new_wb = isw->new_wb;
+ unsigned long nr_switched = 0;
+ struct inode **inodep;
+
+ /*
+ * If @inode switches cgwb membership while sync_inodes_sb() is
+ * being issued, sync_inodes_sb() might miss it. Synchronize.
+ */
+ down_read(&bdi->wb_switch_rwsem);
+
+ /*
+ * By the time control reaches here, RCU grace period has passed
+ * since I_WB_SWITCH assertion and all wb stat update transactions
+ * between unlocked_inode_to_wb_begin/end() are guaranteed to be
+ * synchronizing against the i_pages lock.
+ *
+ * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
+ * gives us exclusion against all wb related operations on @inode
+ * including IO list manipulations and stat updates.
+ */
+ if (old_wb < new_wb) {
+ spin_lock(&old_wb->list_lock);
+ spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&new_wb->list_lock);
+ spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
+ }
+
+ for (inodep = isw->inodes; *inodep; inodep++) {
+ WARN_ON_ONCE((*inodep)->i_wb != old_wb);
+ if (inode_do_switch_wbs(*inodep, old_wb, new_wb))
+ nr_switched++;
+ }
+
spin_unlock(&new_wb->list_lock);
spin_unlock(&old_wb->list_lock);
up_read(&bdi->wb_switch_rwsem);
- if (switched) {
+ if (nr_switched) {
wb_wakeup(new_wb);
- wb_put(old_wb);
+ wb_put_many(old_wb, nr_switched);
}
- wb_put(new_wb);
- iput(inode);
+ for (inodep = isw->inodes; *inodep; inodep++)
+ iput(*inodep);
+ wb_put(new_wb);
kfree(isw);
-
atomic_dec(&isw_nr_in_flight);
}
-static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
+static bool inode_prepare_wbs_switch(struct inode *inode,
+ struct bdi_writeback *new_wb)
{
- struct inode_switch_wbs_context *isw = container_of(rcu_head,
- struct inode_switch_wbs_context, rcu_head);
+ /*
+ * Paired with smp_mb() in cgroup_writeback_umount().
+ * isw_nr_in_flight must be increased before checking SB_ACTIVE and
+ * grabbing an inode, otherwise isw_nr_in_flight can be observed as 0
+ * in cgroup_writeback_umount() and the isw_wq will be not flushed.
+ */
+ smp_mb();
+
+ /* while holding I_WB_SWITCH, no one else can update the association */
+ spin_lock(&inode->i_lock);
+ if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
+ inode->i_state & (I_WB_SWITCH | I_FREEING | I_WILL_FREE) ||
+ inode_to_wb(inode) == new_wb) {
+ spin_unlock(&inode->i_lock);
+ return false;
+ }
+ inode->i_state |= I_WB_SWITCH;
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
- /* needs to grab bh-unsafe locks, bounce to work item */
- INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
- queue_work(isw_wq, &isw->work);
+ return true;
}
/**
@@ -501,10 +558,12 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT)
return;
- isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
+ isw = kzalloc(sizeof(*isw) + 2 * sizeof(struct inode *), GFP_ATOMIC);
if (!isw)
return;
+ atomic_inc(&isw_nr_in_flight);
+
/* find and pin the new wb */
rcu_read_lock();
memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
@@ -514,19 +573,10 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
if (!isw->new_wb)
goto out_free;
- /* while holding I_WB_SWITCH, no one else can update the association */
- spin_lock(&inode->i_lock);
- if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
- inode->i_state & (I_WB_SWITCH | I_FREEING) ||
- inode_to_wb(inode) == isw->new_wb) {
- spin_unlock(&inode->i_lock);
+ if (!inode_prepare_wbs_switch(inode, isw->new_wb))
goto out_free;
- }
- inode->i_state |= I_WB_SWITCH;
- __iget(inode);
- spin_unlock(&inode->i_lock);
- isw->inode = inode;
+ isw->inodes[0] = inode;
/*
* In addition to synchronizing among switchers, I_WB_SWITCH tells
@@ -534,18 +584,85 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
* lock so that stat transfer can synchronize against them.
* Let's continue after I_WB_SWITCH is guaranteed to be visible.
*/
- call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
-
- atomic_inc(&isw_nr_in_flight);
+ INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn);
+ queue_rcu_work(isw_wq, &isw->work);
return;
out_free:
+ atomic_dec(&isw_nr_in_flight);
if (isw->new_wb)
wb_put(isw->new_wb);
kfree(isw);
}
/**
+ * cleanup_offline_cgwb - detach associated inodes
+ * @wb: target wb
+ *
+ * Switch all inodes attached to @wb to a nearest living ancestor's wb in order
+ * to eventually release the dying @wb. Returns %true if not all inodes were
+ * switched and the function has to be restarted.
+ */
+bool cleanup_offline_cgwb(struct bdi_writeback *wb)
+{
+ struct cgroup_subsys_state *memcg_css;
+ struct inode_switch_wbs_context *isw;
+ struct inode *inode;
+ int nr;
+ bool restart = false;
+
+ isw = kzalloc(sizeof(*isw) + WB_MAX_INODES_PER_ISW *
+ sizeof(struct inode *), GFP_KERNEL);
+ if (!isw)
+ return restart;
+
+ atomic_inc(&isw_nr_in_flight);
+
+ for (memcg_css = wb->memcg_css->parent; memcg_css;
+ memcg_css = memcg_css->parent) {
+ isw->new_wb = wb_get_create(wb->bdi, memcg_css, GFP_KERNEL);
+ if (isw->new_wb)
+ break;
+ }
+ if (unlikely(!isw->new_wb))
+ isw->new_wb = &wb->bdi->wb; /* wb_get() is noop for bdi's wb */
+
+ nr = 0;
+ spin_lock(&wb->list_lock);
+ list_for_each_entry(inode, &wb->b_attached, i_io_list) {
+ if (!inode_prepare_wbs_switch(inode, isw->new_wb))
+ continue;
+
+ isw->inodes[nr++] = inode;
+
+ if (nr >= WB_MAX_INODES_PER_ISW - 1) {
+ restart = true;
+ break;
+ }
+ }
+ spin_unlock(&wb->list_lock);
+
+ /* no attached inodes? bail out */
+ if (nr == 0) {
+ atomic_dec(&isw_nr_in_flight);
+ wb_put(isw->new_wb);
+ kfree(isw);
+ return restart;
+ }
+
+ /*
+ * In addition to synchronizing among switchers, I_WB_SWITCH tells
+ * the RCU protected stat update paths to grab the i_page
+ * lock so that stat transfer can synchronize against them.
+ * Let's continue after I_WB_SWITCH is guaranteed to be visible.
+ */
+ INIT_RCU_WORK(&isw->work, inode_switch_wbs_work_fn);
+ queue_rcu_work(isw_wq, &isw->work);
+
+ return restart;
+}
+
+/**
* wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it
* @wbc: writeback_control of interest
* @inode: target inode
@@ -1000,6 +1117,12 @@ out_bdi_put:
*/
void cgroup_writeback_umount(void)
{
+ /*
+ * SB_ACTIVE should be reliably cleared before checking
+ * isw_nr_in_flight, see generic_shutdown_super().
+ */
+ smp_mb();
+
if (atomic_read(&isw_nr_in_flight)) {
/*
* Use rcu_barrier() to wait for all pending callbacks to
@@ -1024,6 +1147,17 @@ fs_initcall(cgroup_writeback_init);
static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
+static void inode_cgwb_move_to_attached(struct inode *inode,
+ struct bdi_writeback *wb)
+{
+ assert_spin_locked(&wb->list_lock);
+ assert_spin_locked(&inode->i_lock);
+
+ inode->i_state &= ~I_SYNC_QUEUED;
+ list_del_init(&inode->i_io_list);
+ wb_io_lists_depopulated(wb);
+}
+
static struct bdi_writeback *
locked_inode_to_wb_and_lock_list(struct inode *inode)
__releases(&inode->i_lock)
@@ -1124,7 +1258,11 @@ void inode_io_list_del(struct inode *inode)
wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
- inode_io_list_del_locked(inode, wb);
+
+ inode->i_state &= ~I_SYNC_QUEUED;
+ list_del_init(&inode->i_io_list);
+ wb_io_lists_depopulated(wb);
+
spin_unlock(&inode->i_lock);
spin_unlock(&wb->list_lock);
}
@@ -1437,7 +1575,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
inode->i_state &= ~I_SYNC_QUEUED;
} else {
/* The inode is clean. Remove from writeback lists. */
- inode_io_list_del_locked(inode, wb);
+ inode_cgwb_move_to_attached(inode, wb);
}
}
@@ -1589,7 +1727,7 @@ static int writeback_single_inode(struct inode *inode,
* responsible for the writeback lists.
*/
if (!(inode->i_state & I_DIRTY_ALL))
- inode_io_list_del_locked(inode, wb);
+ inode_cgwb_move_to_attached(inode, wb);
spin_unlock(&wb->list_lock);
inode_sync_complete(inode);
out:
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index ff99ab2a3c43..fb733eb5aead 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -9,6 +9,7 @@
#include <linux/delay.h>
#include <linux/dax.h>
#include <linux/uio.h>
+#include <linux/pagemap.h>
#include <linux/pfn_t.h>
#include <linux/iomap.h>
#include <linux/interval_tree.h>
@@ -1329,7 +1330,7 @@ bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
static const struct address_space_operations fuse_dax_file_aops = {
.writepages = fuse_dax_writepages,
.direct_IO = noop_direct_IO,
- .set_page_dirty = noop_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_no_writeback,
.invalidatepage = noop_invalidatepage,
};
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 23b5be3db044..81d8f064126e 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -784,7 +784,7 @@ static const struct address_space_operations gfs2_aops = {
.writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readahead = gfs2_readahead,
- .set_page_dirty = iomap_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_nobuffers,
.releasepage = iomap_releasepage,
.invalidatepage = iomap_invalidatepage,
.bmap = gfs2_bmap,
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index d68184ebbfdd..7c9619997355 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -89,11 +89,13 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
}
const struct address_space_operations gfs2_meta_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.writepage = gfs2_aspace_writepage,
.releasepage = gfs2_releasepage,
};
const struct address_space_operations gfs2_rgrp_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.writepage = gfs2_aspace_writepage,
.releasepage = gfs2_releasepage,
};
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 3fc5cb346586..4a95a92546a0 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -159,6 +159,7 @@ static int hfs_writepages(struct address_space *mapping,
}
const struct address_space_operations hfs_btree_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = hfs_readpage,
.writepage = hfs_writepage,
.write_begin = hfs_write_begin,
@@ -168,6 +169,7 @@ const struct address_space_operations hfs_btree_aops = {
};
const struct address_space_operations hfs_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = hfs_readpage,
.writepage = hfs_writepage,
.write_begin = hfs_write_begin,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 8ea447e5c470..70e8374ddac4 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -156,6 +156,7 @@ static int hfsplus_writepages(struct address_space *mapping,
}
const struct address_space_operations hfsplus_btree_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = hfsplus_readpage,
.writepage = hfsplus_writepage,
.write_begin = hfsplus_write_begin,
@@ -165,6 +166,7 @@ const struct address_space_operations hfsplus_btree_aops = {
};
const struct address_space_operations hfsplus_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = hfsplus_readpage,
.writepage = hfsplus_writepage,
.write_begin = hfsplus_write_begin,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 077c25128eb7..c3a49aacf20a 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -196,6 +196,7 @@ static int hpfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
}
const struct address_space_operations hpfs_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = hpfs_readpage,
.writepage = hpfs_writepage,
.readahead = hpfs_readahead,
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 9023717c5188..0065781935c7 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -640,31 +640,6 @@ out_no_page:
return status;
}
-int
-iomap_set_page_dirty(struct page *page)
-{
- struct address_space *mapping = page_mapping(page);
- int newly_dirty;
-
- if (unlikely(!mapping))
- return !TestSetPageDirty(page);
-
- /*
- * Lock out page's memcg migration to keep PageDirty
- * synchronized with per-memcg dirty page counters.
- */
- lock_page_memcg(page);
- newly_dirty = !TestSetPageDirty(page);
- if (newly_dirty)
- __set_page_dirty(page, mapping, 0);
- unlock_page_memcg(page);
-
- if (newly_dirty)
- __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
- return newly_dirty;
-}
-EXPORT_SYMBOL_GPL(iomap_set_page_dirty);
-
static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
size_t copied, struct page *page)
{
@@ -684,7 +659,7 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
if (unlikely(copied < len && !PageUptodate(page)))
return 0;
iomap_set_range_uptodate(page, offset_in_page(pos), len);
- iomap_set_page_dirty(page);
+ __set_page_dirty_nobuffers(page);
return copied;
}
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 6f65bfa9f18d..3663dd5a23bc 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -356,6 +356,7 @@ static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
}
const struct address_space_operations jfs_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = jfs_readpage,
.readahead = jfs_readahead,
.writepage = jfs_writepage,
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index d73950fc3d57..26f2aa3586f9 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -17,12 +17,6 @@
#include "kernfs-internal.h"
-static const struct address_space_operations kernfs_aops = {
- .readpage = simple_readpage,
- .write_begin = simple_write_begin,
- .write_end = simple_write_end,
-};
-
static const struct inode_operations kernfs_iops = {
.permission = kernfs_iop_permission,
.setattr = kernfs_iop_setattr,
@@ -203,7 +197,7 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
{
kernfs_get(kn);
inode->i_private = kn;
- inode->i_mapping->a_ops = &kernfs_aops;
+ inode->i_mapping->a_ops = &ram_aops;
inode->i_op = &kernfs_iops;
inode->i_generation = kernfs_gen(kn);
diff --git a/fs/libfs.c b/fs/libfs.c
index e9b29c6ffccb..51b4de3b3447 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -512,7 +512,7 @@ int simple_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
}
EXPORT_SYMBOL(simple_setattr);
-int simple_readpage(struct file *file, struct page *page)
+static int simple_readpage(struct file *file, struct page *page)
{
clear_highpage(page);
flush_dcache_page(page);
@@ -520,7 +520,6 @@ int simple_readpage(struct file *file, struct page *page)
unlock_page(page);
return 0;
}
-EXPORT_SYMBOL(simple_readpage);
int simple_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
@@ -568,7 +567,7 @@ EXPORT_SYMBOL(simple_write_begin);
*
* Use *ONLY* with simple_readpage()
*/
-int simple_write_end(struct file *file, struct address_space *mapping,
+static int simple_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
@@ -597,7 +596,17 @@ int simple_write_end(struct file *file, struct address_space *mapping,
return copied;
}
-EXPORT_SYMBOL(simple_write_end);
+
+/*
+ * Provides ramfs-style behavior: data in the pagecache, but no writeback.
+ */
+const struct address_space_operations ram_aops = {
+ .readpage = simple_readpage,
+ .write_begin = simple_write_begin,
+ .write_end = simple_write_end,
+ .set_page_dirty = __set_page_dirty_no_writeback,
+};
+EXPORT_SYMBOL(ram_aops);
/*
* the inodes created here are not hashed. If you use iunique to generate
@@ -1162,22 +1171,6 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL(noop_fsync);
-int noop_set_page_dirty(struct page *page)
-{
- /*
- * Unlike __set_page_dirty_no_writeback that handles dirty page
- * tracking in the page object, dax does all dirty tracking in
- * the inode address_space in response to mkwrite faults. In the
- * dax case we only need to worry about potentially dirty CPU
- * caches, not dirty page cache pages to write back.
- *
- * This callback is defined to prevent fallback to
- * __set_page_dirty_buffers() in set_page_dirty().
- */
- return 0;
-}
-EXPORT_SYMBOL_GPL(noop_set_page_dirty);
-
void noop_invalidatepage(struct page *page, unsigned int offset,
unsigned int length)
{
@@ -1208,19 +1201,10 @@ void kfree_link(void *p)
}
EXPORT_SYMBOL(kfree_link);
-/*
- * nop .set_page_dirty method so that people can use .page_mkwrite on
- * anon inodes.
- */
-static int anon_set_page_dirty(struct page *page)
-{
- return 0;
-};
-
struct inode *alloc_anon_inode(struct super_block *s)
{
static const struct address_space_operations anon_aops = {
- .set_page_dirty = anon_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_no_writeback,
};
struct inode *inode = new_inode_pseudo(s);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index a532a99bbe81..a71f1cf894b9 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -442,6 +442,7 @@ static sector_t minix_bmap(struct address_space *mapping, sector_t block)
}
static const struct address_space_operations minix_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = minix_readpage,
.writepage = minix_writepage,
.write_begin = minix_write_begin,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index c0361ce45f62..97769fe4d588 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -434,6 +434,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
static const struct address_space_operations def_mdt_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.writepage = nilfs_mdt_write_page,
};
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index f5c058b3192c..4474adb393ca 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -477,7 +477,7 @@ err_corrupt_attr:
}
file_name_attr = (FILE_NAME_ATTR*)((u8*)attr +
le16_to_cpu(attr->data.resident.value_offset));
- p2 = (u8*)attr + le32_to_cpu(attr->data.resident.value_length);
+ p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
if (p2 < (u8*)attr || p2 > p)
goto err_corrupt_attr;
/* This attribute is ok, but is it in the $Extend directory? */
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1294925ac94a..68d11c295dd3 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -632,8 +632,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
}
if (PageUptodate(page)) {
- if (!buffer_uptodate(bh))
- set_buffer_uptodate(bh);
+ set_buffer_uptodate(bh);
} else if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_new(bh) &&
ocfs2_should_read_blk(inode, page, block_start) &&
@@ -2454,6 +2453,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
}
const struct address_space_operations ocfs2_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = ocfs2_readpage,
.readahead = ocfs2_readahead,
.writepage = ocfs2_writepage,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index e829c2595543..f89ffcbd585f 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1442,8 +1442,6 @@ void o2hb_init(void)
for (i = 0; i < ARRAY_SIZE(o2hb_live_slots); i++)
INIT_LIST_HEAD(&o2hb_live_slots[i]);
- INIT_LIST_HEAD(&o2hb_node_events);
-
memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap));
memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap));
@@ -1598,12 +1596,13 @@ static ssize_t o2hb_region_start_block_store(struct config_item *item,
struct o2hb_region *reg = to_o2hb_region(item);
unsigned long long tmp;
char *p = (char *)page;
+ ssize_t ret;
if (reg->hr_bdev)
return -EINVAL;
- tmp = simple_strtoull(p, &p, 0);
- if (!p || (*p && (*p != '\n')))
+ ret = kstrtoull(p, 0, &tmp);
+ if (ret)
return -EINVAL;
reg->hr_start_block = tmp;
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index bb82e6b1ff4e..625c92521416 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -824,7 +824,7 @@ static void __exit exit_o2nm(void)
static int __init init_o2nm(void)
{
- int ret = -1;
+ int ret;
o2hb_init();
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4960a6de768d..9b88219febb5 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2977,7 +2977,7 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
enum dlm_lockres_list idx;
- struct list_head *queue = &res->granted;
+ struct list_head *queue;
struct dlm_lock *lock;
int noderef;
u8 nodenum = O2NM_MAX_NODES;
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 90b8d300c1ee..de56e6231af8 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -326,11 +326,7 @@ static ssize_t ocfs2_filecheck_attr_show(struct kobject *kobj,
ret = snprintf(buf + total, remain, "%lu\t\t%u\t%s\n",
p->fe_ino, p->fe_done,
ocfs2_filecheck_error(p->fe_status));
- if (ret < 0) {
- total = ret;
- break;
- }
- if (ret == remain) {
+ if (ret >= remain) {
/* snprintf() didn't fit */
total = -E2BIG;
break;
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index d50e8b8dfea4..16f1bfc407f2 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -500,11 +500,7 @@ static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj,
list_for_each_entry(p, &ocfs2_stack_list, sp_list) {
ret = snprintf(buf, remain, "%s\n",
p->sp_name);
- if (ret < 0) {
- total = ret;
- break;
- }
- if (ret == remain) {
+ if (ret >= remain) {
/* snprintf() didn't fit */
total = -E2BIG;
break;
@@ -531,7 +527,7 @@ static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj,
if (active_stack) {
ret = snprintf(buf, PAGE_SIZE, "%s\n",
active_stack->sp_name);
- if (ret == PAGE_SIZE)
+ if (ret >= PAGE_SIZE)
ret = -E2BIG;
}
spin_unlock(&ocfs2_stack_lock);
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 11e733aab25d..89725b15a64b 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -372,6 +372,7 @@ const struct inode_operations omfs_file_inops = {
};
const struct address_space_operations omfs_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = omfs_readpage,
.readahead = omfs_readahead,
.writepage = omfs_writepage,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fc9784544b24..66965ad88d8b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1047,7 +1047,7 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
return false;
if (!is_cow_mapping(vma->vm_flags))
return false;
- if (likely(!atomic_read(&vma->vm_mm->has_pinned)))
+ if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)))
return false;
page = vm_normal_page(vma, addr, pte);
if (!page)
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 9ebd17d7befb..65e7e56005b8 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -53,13 +53,6 @@ struct ramfs_fs_info {
static const struct super_operations ramfs_ops;
static const struct inode_operations ramfs_dir_inode_operations;
-static const struct address_space_operations ramfs_aops = {
- .readpage = simple_readpage,
- .write_begin = simple_write_begin,
- .write_end = simple_write_end,
- .set_page_dirty = __set_page_dirty_no_writeback,
-};
-
struct inode *ramfs_get_inode(struct super_block *sb,
const struct inode *dir, umode_t mode, dev_t dev)
{
@@ -68,7 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
if (inode) {
inode->i_ino = get_next_ino();
inode_init_owner(&init_user_ns, inode, dir, mode);
- inode->i_mapping->a_ops = &ramfs_aops;
+ inode->i_mapping->a_ops = &ram_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
mapping_set_unevictable(inode->i_mapping);
inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index b9e87ebb1060..855f0e87066d 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -226,8 +226,11 @@ out_free_bio:
bio_free_pages(bio);
bio_put(bio);
out:
- if (res < 0)
+ if (res < 0) {
ERROR("Failed to read block 0x%llx: %d\n", index, res);
+ if (msblk->panic_on_errors)
+ panic("squashfs read failed");
+ }
return res;
}
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 166e98806265..1e90c2575f9b 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -65,5 +65,6 @@ struct squashfs_sb_info {
unsigned int fragments;
int xattr_ids;
unsigned int ids;
+ bool panic_on_errors;
};
#endif
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 88cc94be1076..60d6951915f4 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -18,9 +18,11 @@
#include <linux/fs.h>
#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
#include <linux/vfs.h>
#include <linux/slab.h>
#include <linux/mutex.h>
+#include <linux/seq_file.h>
#include <linux/pagemap.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -37,6 +39,51 @@
static struct file_system_type squashfs_fs_type;
static const struct super_operations squashfs_super_ops;
+enum Opt_errors {
+ Opt_errors_continue,
+ Opt_errors_panic,
+};
+
+enum squashfs_param {
+ Opt_errors,
+};
+
+struct squashfs_mount_opts {
+ enum Opt_errors errors;
+};
+
+static const struct constant_table squashfs_param_errors[] = {
+ {"continue", Opt_errors_continue },
+ {"panic", Opt_errors_panic },
+ {}
+};
+
+static const struct fs_parameter_spec squashfs_fs_parameters[] = {
+ fsparam_enum("errors", Opt_errors, squashfs_param_errors),
+ {}
+};
+
+static int squashfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct squashfs_mount_opts *opts = fc->fs_private;
+ struct fs_parse_result result;
+ int opt;
+
+ opt = fs_parse(fc, squashfs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_errors:
+ opts->errors = result.uint_32;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static const struct squashfs_decompressor *supported_squashfs_filesystem(
struct fs_context *fc,
short major, short minor, short id)
@@ -67,6 +114,7 @@ static const struct squashfs_decompressor *supported_squashfs_filesystem(
static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
+ struct squashfs_mount_opts *opts = fc->fs_private;
struct squashfs_sb_info *msblk;
struct squashfs_super_block *sblk = NULL;
struct inode *root;
@@ -85,6 +133,8 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc)
}
msblk = sb->s_fs_info;
+ msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
+
msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
msblk->devblksize_log2 = ffz(~msblk->devblksize);
@@ -350,18 +400,52 @@ static int squashfs_get_tree(struct fs_context *fc)
static int squashfs_reconfigure(struct fs_context *fc)
{
+ struct super_block *sb = fc->root->d_sb;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+ struct squashfs_mount_opts *opts = fc->fs_private;
+
sync_filesystem(fc->root->d_sb);
fc->sb_flags |= SB_RDONLY;
+
+ msblk->panic_on_errors = (opts->errors == Opt_errors_panic);
+
return 0;
}
+static void squashfs_free_fs_context(struct fs_context *fc)
+{
+ kfree(fc->fs_private);
+}
+
static const struct fs_context_operations squashfs_context_ops = {
.get_tree = squashfs_get_tree,
+ .free = squashfs_free_fs_context,
+ .parse_param = squashfs_parse_param,
.reconfigure = squashfs_reconfigure,
};
+static int squashfs_show_options(struct seq_file *s, struct dentry *root)
+{
+ struct super_block *sb = root->d_sb;
+ struct squashfs_sb_info *msblk = sb->s_fs_info;
+
+ if (msblk->panic_on_errors)
+ seq_puts(s, ",errors=panic");
+ else
+ seq_puts(s, ",errors=continue");
+
+ return 0;
+}
+
static int squashfs_init_fs_context(struct fs_context *fc)
{
+ struct squashfs_mount_opts *opts;
+
+ opts = kzalloc(sizeof(*opts), GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+
+ fc->fs_private = opts;
fc->ops = &squashfs_context_ops;
return 0;
}
@@ -481,6 +565,7 @@ static struct file_system_type squashfs_fs_type = {
.owner = THIS_MODULE,
.name = "squashfs",
.init_fs_context = squashfs_init_fs_context,
+ .parameters = squashfs_fs_parameters,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV
};
@@ -491,6 +576,7 @@ static const struct super_operations squashfs_super_ops = {
.free_inode = squashfs_free_inode,
.statfs = squashfs_statfs,
.put_super = squashfs_put_super,
+ .show_options = squashfs_show_options,
};
module_init(init_squashfs_fs);
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 8b2e99b7bc9f..749385015a8d 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -495,6 +495,7 @@ static sector_t sysv_bmap(struct address_space *mapping, sector_t block)
}
const struct address_space_operations sysv_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = sysv_readpage,
.writepage = sysv_writepage,
.write_begin = sysv_write_begin,
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 2846dcd92197..1baff8ddb754 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -125,6 +125,7 @@ static int udf_adinicb_write_end(struct file *file, struct address_space *mappin
}
const struct address_space_operations udf_adinicb_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = udf_adinicb_readpage,
.writepage = udf_adinicb_writepage,
.write_begin = udf_adinicb_write_begin,
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 0dd2f93ac048..4917670860a0 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -235,6 +235,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block)
}
const struct address_space_operations udf_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = udf_readpage,
.readahead = udf_readahead,
.writepage = udf_writepage,
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index debc282c1bb4..ac628de69601 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -526,6 +526,7 @@ static sector_t ufs_bmap(struct address_space *mapping, sector_t block)
}
const struct address_space_operations ufs_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
.readpage = ufs_readpage,
.writepage = ufs_writepage,
.write_begin = ufs_write_begin,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 826caa6b4a5a..cb4e0fcf4c76 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -561,7 +561,7 @@ const struct address_space_operations xfs_address_space_operations = {
.readahead = xfs_vm_readahead,
.writepage = xfs_vm_writepage,
.writepages = xfs_vm_writepages,
- .set_page_dirty = iomap_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_nobuffers,
.releasepage = iomap_releasepage,
.invalidatepage = iomap_invalidatepage,
.bmap = xfs_vm_bmap,
@@ -575,7 +575,7 @@ const struct address_space_operations xfs_address_space_operations = {
const struct address_space_operations xfs_dax_aops = {
.writepages = xfs_dax_writepages,
.direct_IO = noop_direct_IO,
- .set_page_dirty = noop_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_no_writeback,
.invalidatepage = noop_invalidatepage,
.swap_activate = xfs_iomap_swapfile_activate,
};
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index cd145d318b17..dbf03635869c 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -5,7 +5,7 @@
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
*/
#include <linux/module.h>
-#include <linux/fs.h>
+#include <linux/pagemap.h>
#include <linux/magic.h>
#include <linux/iomap.h>
#include <linux/init.h>
@@ -185,7 +185,7 @@ static const struct address_space_operations zonefs_file_aops = {
.readahead = zonefs_readahead,
.writepage = zonefs_writepage,
.writepages = zonefs_writepages,
- .set_page_dirty = iomap_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_nobuffers,
.releasepage = iomap_releasepage,
.invalidatepage = iomap_invalidatepage,
.migratepage = iomap_migrate_page,