summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/acl.h5
-rw-r--r--fs/ext4/ext4.h9
-rw-r--r--fs/ext4/extents.c3
-rw-r--r--fs/ext4/file.c5
-rw-r--r--fs/ext4/inode.c11
-rw-r--r--fs/ext4/ioctl.c3
-rw-r--r--fs/ext4/mballoc-test.c76
-rw-r--r--fs/ext4/mballoc.c322
-rw-r--r--fs/ext4/mballoc.h14
-rw-r--r--fs/ext4/move_extent.c4
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/page-io.c3
-rw-r--r--fs/ext4/readpage.c1
-rw-r--r--fs/ext4/super.c36
-rw-r--r--fs/ext4/sysfs.c174
-rw-r--r--fs/ext4/xattr.c145
-rw-r--r--fs/ioctl.c3
-rw-r--r--fs/iomap/Makefile2
-rw-r--r--fs/jbd2/checkpoint.c24
-rw-r--r--fs/jbd2/commit.c3
-rw-r--r--fs/lockd/host.c1
-rw-r--r--fs/nfsd/export.c16
-rw-r--r--fs/nfsd/netlink.c66
-rw-r--r--fs/nfsd/netlink.h10
-rw-r--r--fs/nfsd/netns.h1
-rw-r--r--fs/nfsd/nfs4callback.c31
-rw-r--r--fs/nfsd/nfs4proc.c79
-rw-r--r--fs/nfsd/nfs4state.c188
-rw-r--r--fs/nfsd/nfs4xdr.c83
-rw-r--r--fs/nfsd/nfsctl.c526
-rw-r--r--fs/nfsd/nfsd.h3
-rw-r--r--fs/nfsd/nfsfh.c4
-rw-r--r--fs/nfsd/nfssvc.c11
-rw-r--r--fs/nfsd/state.h6
-rw-r--r--fs/nfsd/stats.c42
-rw-r--r--fs/nfsd/stats.h5
-rw-r--r--fs/nfsd/trace.h100
-rw-r--r--fs/nfsd/vfs.c2
-rw-r--r--fs/nfsd/vfs.h8
-rw-r--r--fs/nfsd/xdr4.h24
-rw-r--r--fs/ocfs2/cluster/tcp.c5
-rw-r--r--fs/proc/proc_sysctl.c21
-rw-r--r--fs/tracefs/event_inode.c7
-rw-r--r--fs/unicode/Makefile14
-rw-r--r--fs/xfs/Makefile4
45 files changed, 1514 insertions, 588 deletions
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index ef4c19e5f570..0c5a79c3b5d4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -68,11 +68,6 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
static inline int
ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
{
- /* usually, the umask is applied by posix_acl_create(), but if
- ext4 ACL support is disabled at compile time, we need to do
- it here, because posix_acl_create() will never be called */
- inode->i_mode &= ~current_umask();
-
return 0;
}
#endif /* CONFIG_EXT4_FS_POSIX_ACL */
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8d126654019e..983dad8c07ec 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -213,11 +213,14 @@ enum criteria {
#define EXT4_MB_USE_RESERVED 0x2000
/* Do strict check for free blocks while retrying block allocation */
#define EXT4_MB_STRICT_CHECK 0x4000
-/* Large fragment size list lookup succeeded at least once for cr = 0 */
+/* Large fragment size list lookup succeeded at least once for
+ * CR_POWER2_ALIGNED */
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
-/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
+/* Avg fragment size rb tree lookup succeeded at least once for
+ * CR_GOAL_LEN_FAST */
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
-/* Avg fragment size rb tree lookup succeeded at least once for cr = 1.5 */
+/* Avg fragment size rb tree lookup succeeded at least once for
+ * CR_BEST_AVAIL_LEN */
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000
struct ext4_allocation_request {
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e57054bdc5fd..e067f2dd0335 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3402,9 +3402,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
struct ext4_extent *ex, *abut_ex;
ext4_lblk_t ee_block, eof_block;
unsigned int ee_len, depth, map_len = map->m_len;
- int allocated = 0, max_zeroout = 0;
int err = 0;
int split_flag = EXT4_EXT_DATA_VALID2;
+ int allocated = 0;
+ unsigned int max_zeroout = 0;
ext_debug(inode, "logical block %llu, max_blocks %u\n",
(unsigned long long)map->m_lblk, map_len);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 28c51b0cc4db..c89e434db6b7 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -844,8 +844,7 @@ static int ext4_sample_last_mounted(struct super_block *sb,
if (err)
goto out_journal;
lock_buffer(sbi->s_sbh);
- strncpy(sbi->s_es->s_last_mounted, cp,
- sizeof(sbi->s_es->s_last_mounted));
+ strtomem_pad(sbi->s_es->s_last_mounted, cp, 0);
ext4_superblock_csum_set(sb);
unlock_buffer(sbi->s_sbh);
ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
@@ -885,7 +884,7 @@ static int ext4_file_open(struct inode *inode, struct file *filp)
return ret;
}
- filp->f_mode |= FMODE_NOWAIT;
+ filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
return dquot_file_open(inode, filp);
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 537803250ca9..4bae9ccf5fe0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1865,7 +1865,7 @@ static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio)
len = folio_size(folio);
if (folio_pos(folio) + len > size &&
!ext4_verity_in_progress(mpd->inode))
- len = size & ~PAGE_MASK;
+ len = size & (len - 1);
err = ext4_bio_write_folio(&mpd->io_submit, folio, len);
if (!err)
mpd->wbc->nr_to_write--;
@@ -2334,7 +2334,7 @@ static int mpage_journal_page_buffers(handle_t *handle,
if (folio_pos(folio) + len > size &&
!ext4_verity_in_progress(inode))
- len = size - folio_pos(folio);
+ len = size & (len - 1);
return ext4_journal_folio_buffers(handle, folio, len);
}
@@ -2887,9 +2887,6 @@ retry:
if (IS_ERR(folio))
return PTR_ERR(folio);
- /* In case writeback began while the folio was unlocked */
- folio_wait_stable(folio);
-
#ifdef CONFIG_FS_ENCRYPTION
ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep);
#else
@@ -3530,7 +3527,6 @@ static const struct address_space_operations ext4_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_invalidate_folio,
.release_folio = ext4_release_folio,
- .direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
@@ -3547,7 +3543,6 @@ static const struct address_space_operations ext4_journalled_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_journalled_invalidate_folio,
.release_folio = ext4_release_folio,
- .direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio_norefs,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
@@ -3564,7 +3559,6 @@ static const struct address_space_operations ext4_da_aops = {
.bmap = ext4_bmap,
.invalidate_folio = ext4_invalidate_folio,
.release_folio = ext4_release_folio,
- .direct_IO = noop_direct_IO,
.migrate_folio = buffer_migrate_folio,
.is_partially_uptodate = block_is_partially_uptodate,
.error_remove_folio = generic_error_remove_folio,
@@ -3573,7 +3567,6 @@ static const struct address_space_operations ext4_da_aops = {
static const struct address_space_operations ext4_dax_aops = {
.writepages = ext4_dax_writepages,
- .direct_IO = noop_direct_IO,
.dirty_folio = noop_dirty_folio,
.bmap = ext4_bmap,
.swap_activate = ext4_iomap_swap_activate,
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 7160a71044c8..dab7acd49709 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1150,9 +1150,8 @@ static int ext4_ioctl_getlabel(struct ext4_sb_info *sbi, char __user *user_label
*/
BUILD_BUG_ON(EXT4_LABEL_MAX >= FSLABEL_MAX);
- memset(label, 0, sizeof(label));
lock_buffer(sbi->s_sbh);
- strncpy(label, sbi->s_es->s_volume_name, EXT4_LABEL_MAX);
+ strscpy_pad(label, sbi->s_es->s_volume_name);
unlock_buffer(sbi->s_sbh);
if (copy_to_user(user_label, label, sizeof(label)))
diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
index 044ca5238f41..bb2a223b207c 100644
--- a/fs/ext4/mballoc-test.c
+++ b/fs/ext4/mballoc-test.c
@@ -30,7 +30,31 @@ struct mbt_ext4_super_block {
#define MBT_CTX(_sb) (&MBT_SB(_sb)->mbt_ctx)
#define MBT_GRP_CTX(_sb, _group) (&MBT_CTX(_sb)->grp_ctx[_group])
+static struct inode *mbt_alloc_inode(struct super_block *sb)
+{
+ struct ext4_inode_info *ei;
+
+ ei = kmalloc(sizeof(struct ext4_inode_info), GFP_KERNEL);
+ if (!ei)
+ return NULL;
+
+ INIT_LIST_HEAD(&ei->i_orphan);
+ init_rwsem(&ei->xattr_sem);
+ init_rwsem(&ei->i_data_sem);
+ inode_init_once(&ei->vfs_inode);
+ ext4_fc_init_inode(&ei->vfs_inode);
+
+ return &ei->vfs_inode;
+}
+
+static void mbt_free_inode(struct inode *inode)
+{
+ kfree(EXT4_I(inode));
+}
+
static const struct super_operations mbt_sops = {
+ .alloc_inode = mbt_alloc_inode,
+ .free_inode = mbt_free_inode,
};
static void mbt_kill_sb(struct super_block *sb)
@@ -859,6 +883,56 @@ static void test_mb_free_blocks(struct kunit *test)
ext4_mb_unload_buddy(&e4b);
}
+#define COUNT_FOR_ESTIMATE 100000
+static void test_mb_mark_used_cost(struct kunit *test)
+{
+ struct ext4_buddy e4b;
+ struct super_block *sb = (struct super_block *)test->priv;
+ struct ext4_free_extent ex;
+ int ret;
+ struct test_range ranges[TEST_RANGE_COUNT];
+ int i, j;
+ unsigned long start, end, all = 0;
+
+ /* buddy cache assumes that each page contains at least one block */
+ if (sb->s_blocksize > PAGE_SIZE)
+ kunit_skip(test, "blocksize exceeds pagesize");
+
+ ret = ext4_mb_load_buddy(sb, TEST_GOAL_GROUP, &e4b);
+ KUNIT_ASSERT_EQ(test, ret, 0);
+
+ ex.fe_group = TEST_GOAL_GROUP;
+ for (j = 0; j < COUNT_FOR_ESTIMATE; j++) {
+ mbt_generate_test_ranges(sb, ranges, TEST_RANGE_COUNT);
+ start = jiffies;
+ for (i = 0; i < TEST_RANGE_COUNT; i++) {
+ if (ranges[i].len == 0)
+ continue;
+
+ ex.fe_start = ranges[i].start;
+ ex.fe_len = ranges[i].len;
+ ext4_lock_group(sb, TEST_GOAL_GROUP);
+ mb_mark_used(&e4b, &ex);
+ ext4_unlock_group(sb, TEST_GOAL_GROUP);
+ }
+ end = jiffies;
+ all += (end - start);
+
+ for (i = 0; i < TEST_RANGE_COUNT; i++) {
+ if (ranges[i].len == 0)
+ continue;
+
+ ext4_lock_group(sb, TEST_GOAL_GROUP);
+ mb_free_blocks(NULL, &e4b, ranges[i].start,
+ ranges[i].len);
+ ext4_unlock_group(sb, TEST_GOAL_GROUP);
+ }
+ }
+
+ kunit_info(test, "costed jiffies %lu\n", all);
+ ext4_mb_unload_buddy(&e4b);
+}
+
static const struct mbt_ext4_block_layout mbt_test_layouts[] = {
{
.blocksize_bits = 10,
@@ -901,6 +975,8 @@ static struct kunit_case mbt_test_cases[] = {
KUNIT_CASE_PARAM(test_mb_mark_used, mbt_layouts_gen_params),
KUNIT_CASE_PARAM(test_mb_free_blocks, mbt_layouts_gen_params),
KUNIT_CASE_PARAM(test_mark_diskspace_used, mbt_layouts_gen_params),
+ KUNIT_CASE_PARAM_ATTR(test_mb_mark_used_cost, mbt_layouts_gen_params,
+ { .speed = KUNIT_SPEED_SLOW }),
{}
};
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 12b3f196010b..9dda9cd68ab2 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len)
return 0;
if (order == MB_NUM_ORDERS(sb))
order--;
+ if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb)))
+ order = MB_NUM_ORDERS(sb) - 1;
return order;
}
@@ -1008,6 +1010,8 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
* goal length.
*/
order = fls(ac->ac_g_ex.fe_len) - 1;
+ if (WARN_ON_ONCE(order - 1 > MB_NUM_ORDERS(ac->ac_sb)))
+ order = MB_NUM_ORDERS(ac->ac_sb);
min_order = order - sbi->s_mb_best_avail_max_trim_order;
if (min_order < 0)
min_order = 0;
@@ -1076,23 +1080,11 @@ static inline int should_optimize_scan(struct ext4_allocation_context *ac)
}
/*
- * Return next linear group for allocation. If linear traversal should not be
- * performed, this function just returns the same group
+ * Return next linear group for allocation.
*/
static ext4_group_t
-next_linear_group(struct ext4_allocation_context *ac, ext4_group_t group,
- ext4_group_t ngroups)
+next_linear_group(ext4_group_t group, ext4_group_t ngroups)
{
- if (!should_optimize_scan(ac))
- goto inc_and_return;
-
- if (ac->ac_groups_linear_remaining) {
- ac->ac_groups_linear_remaining--;
- goto inc_and_return;
- }
-
- return group;
-inc_and_return:
/*
* Artificially restricted ngroups for non-extent
* files makes group > ngroups possible on first loop.
@@ -1118,8 +1110,19 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
{
*new_cr = ac->ac_criteria;
- if (!should_optimize_scan(ac) || ac->ac_groups_linear_remaining) {
- *group = next_linear_group(ac, *group, ngroups);
+ if (!should_optimize_scan(ac)) {
+ *group = next_linear_group(*group, ngroups);
+ return;
+ }
+
+ /*
+ * Optimized scanning can return non adjacent groups which can cause
+ * seek overhead for rotational disks. So try few linear groups before
+ * trying optimized scan.
+ */
+ if (ac->ac_groups_linear_remaining) {
+ *group = next_linear_group(*group, ngroups);
+ ac->ac_groups_linear_remaining--;
return;
}
@@ -1131,8 +1134,9 @@ static void ext4_mb_choose_next_group(struct ext4_allocation_context *ac,
ext4_mb_choose_next_group_best_avail(ac, new_cr, group);
} else {
/*
- * TODO: For CR=2, we can arrange groups in an rb tree sorted by
- * bb_free. But until that happens, we should never come here.
+ * TODO: For CR_GOAL_LEN_SLOW, we can arrange groups in an
+ * rb tree sorted by bb_free. But until that happens, we should
+ * never come here.
*/
WARN_ON(1);
}
@@ -1270,7 +1274,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
* for this page; do not hold this lock when calling this routine!
*/
-static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
+static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
{
ext4_group_t ngroups;
unsigned int blocksize;
@@ -1288,13 +1292,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
char *bitmap;
struct ext4_group_info *grinfo;
- inode = page->mapping->host;
+ inode = folio->mapping->host;
sb = inode->i_sb;
ngroups = ext4_get_groups_count(sb);
blocksize = i_blocksize(inode);
blocks_per_page = PAGE_SIZE / blocksize;
- mb_debug(sb, "init page %lu\n", page->index);
+ mb_debug(sb, "init folio %lu\n", folio->index);
groups_per_page = blocks_per_page >> 1;
if (groups_per_page == 0)
@@ -1309,9 +1313,9 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
} else
bh = &bhs;
- first_group = page->index * blocks_per_page / 2;
+ first_group = folio->index * blocks_per_page / 2;
- /* read all groups the page covers into the cache */
+ /* read all groups the folio covers into the cache */
for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
if (group >= ngroups)
break;
@@ -1322,10 +1326,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
/*
* If page is uptodate then we came here after online resize
* which added some new uninitialized group info structs, so
- * we must skip all initialized uptodate buddies on the page,
+ * we must skip all initialized uptodate buddies on the folio,
* which may be currently in use by an allocating task.
*/
- if (PageUptodate(page) && !EXT4_MB_GRP_NEED_INIT(grinfo)) {
+ if (folio_test_uptodate(folio) &&
+ !EXT4_MB_GRP_NEED_INIT(grinfo)) {
bh[i] = NULL;
continue;
}
@@ -1349,7 +1354,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
err = err2;
}
- first_block = page->index * blocks_per_page;
+ first_block = folio->index * blocks_per_page;
for (i = 0; i < blocks_per_page; i++) {
group = (first_block + i) >> 1;
if (group >= ngroups)
@@ -1370,7 +1375,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
* above
*
*/
- data = page_address(page) + (i * blocksize);
+ data = folio_address(folio) + (i * blocksize);
bitmap = bh[group - first_group]->b_data;
/*
@@ -1385,8 +1390,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
if ((first_block + i) & 1) {
/* this is block of buddy */
BUG_ON(incore == NULL);
- mb_debug(sb, "put buddy for group %u in page %lu/%x\n",
- group, page->index, i * blocksize);
+ mb_debug(sb, "put buddy for group %u in folio %lu/%x\n",
+ group, folio->index, i * blocksize);
trace_ext4_mb_buddy_bitmap_load(sb, group);
grinfo->bb_fragments = 0;
memset(grinfo->bb_counters, 0,
@@ -1404,8 +1409,8 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
} else {
/* this is block of bitmap */
BUG_ON(incore != NULL);
- mb_debug(sb, "put bitmap for group %u in page %lu/%x\n",
- group, page->index, i * blocksize);
+ mb_debug(sb, "put bitmap for group %u in folio %lu/%x\n",
+ group, folio->index, i * blocksize);
trace_ext4_mb_bitmap_load(sb, group);
/* see comments in ext4_mb_put_pa() */
@@ -1423,7 +1428,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
incore = data;
}
}
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
out:
if (bh) {
@@ -1439,7 +1444,7 @@ out:
* Lock the buddy and bitmap pages. This make sure other parallel init_group
* on the same buddy page doesn't happen whild holding the buddy page lock.
* Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
- * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
+ * are on the same page e4b->bd_buddy_folio is NULL and return value is 0.
*/
static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
@@ -1447,10 +1452,10 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
int block, pnum, poff;
int blocks_per_page;
- struct page *page;
+ struct folio *folio;
- e4b->bd_buddy_page = NULL;
- e4b->bd_bitmap_page = NULL;
+ e4b->bd_buddy_folio = NULL;
+ e4b->bd_bitmap_folio = NULL;
blocks_per_page = PAGE_SIZE / sb->s_blocksize;
/*
@@ -1461,12 +1466,13 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block = group * 2;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
- page = find_or_create_page(inode->i_mapping, pnum, gfp);
- if (!page)
- return -ENOMEM;
- BUG_ON(page->mapping != inode->i_mapping);
- e4b->bd_bitmap_page = page;
- e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
+ folio = __filemap_get_folio(inode->i_mapping, pnum,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ BUG_ON(folio->mapping != inode->i_mapping);
+ e4b->bd_bitmap_folio = folio;
+ e4b->bd_bitmap = folio_address(folio) + (poff * sb->s_blocksize);
if (blocks_per_page >= 2) {
/* buddy and bitmap are on the same page */
@@ -1474,23 +1480,24 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
}
/* blocks_per_page == 1, hence we need another page for the buddy */
- page = find_or_create_page(inode->i_mapping, block + 1, gfp);
- if (!page)
- return -ENOMEM;
- BUG_ON(page->mapping != inode->i_mapping);
- e4b->bd_buddy_page = page;
+ folio = __filemap_get_folio(inode->i_mapping, block + 1,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
+ BUG_ON(folio->mapping != inode->i_mapping);
+ e4b->bd_buddy_folio = folio;
return 0;
}
static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
{
- if (e4b->bd_bitmap_page) {
- unlock_page(e4b->bd_bitmap_page);
- put_page(e4b->bd_bitmap_page);
+ if (e4b->bd_bitmap_folio) {
+ folio_unlock(e4b->bd_bitmap_folio);
+ folio_put(e4b->bd_bitmap_folio);
}
- if (e4b->bd_buddy_page) {
- unlock_page(e4b->bd_buddy_page);
- put_page(e4b->bd_buddy_page);
+ if (e4b->bd_buddy_folio) {
+ folio_unlock(e4b->bd_buddy_folio);
+ folio_put(e4b->bd_buddy_folio);
}
}
@@ -1505,7 +1512,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
struct ext4_group_info *this_grp;
struct ext4_buddy e4b;
- struct page *page;
+ struct folio *folio;
int ret = 0;
might_sleep();
@@ -1532,16 +1539,16 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
goto err;
}
- page = e4b.bd_bitmap_page;
- ret = ext4_mb_init_cache(page, NULL, gfp);
+ folio = e4b.bd_bitmap_folio;
+ ret = ext4_mb_init_cache(folio, NULL, gfp);
if (ret)
goto err;
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
- if (e4b.bd_buddy_page == NULL) {
+ if (e4b.bd_buddy_folio == NULL) {
/*
* If both the bitmap and buddy are in
* the same page we don't need to force
@@ -1551,11 +1558,11 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
goto err;
}
/* init buddy cache */
- page = e4b.bd_buddy_page;
- ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
+ folio = e4b.bd_buddy_folio;
+ ret = ext4_mb_init_cache(folio, e4b.bd_bitmap, gfp);
if (ret)
goto err;
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
@@ -1577,7 +1584,7 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
int block;
int pnum;
int poff;
- struct page *page;
+ struct folio *folio;
int ret;
struct ext4_group_info *grp;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1595,8 +1602,8 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
e4b->bd_info = grp;
e4b->bd_sb = sb;
e4b->bd_group = group;
- e4b->bd_buddy_page = NULL;
- e4b->bd_bitmap_page = NULL;
+ e4b->bd_buddy_folio = NULL;
+ e4b->bd_bitmap_folio = NULL;
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
/*
@@ -1617,102 +1624,103 @@ ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
- /* we could use find_or_create_page(), but it locks page
- * what we'd like to avoid in fast path ... */
- page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
- if (page == NULL || !PageUptodate(page)) {
- if (page)
+ /* Avoid locking the folio in the fast path ... */
+ folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0);
+ if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
+ if (!IS_ERR(folio))
/*
- * drop the page reference and try
- * to get the page with lock. If we
+ * drop the folio reference and try
+ * to get the folio with lock. If we
* are not uptodate that implies
- * somebody just created the page but
- * is yet to initialize the same. So
+ * somebody just created the folio but
+ * is yet to initialize it. So
* wait for it to initialize.
*/
- put_page(page);
- page = find_or_create_page(inode->i_mapping, pnum, gfp);
- if (page) {
- if (WARN_RATELIMIT(page->mapping != inode->i_mapping,
- "ext4: bitmap's paging->mapping != inode->i_mapping\n")) {
+ folio_put(folio);
+ folio = __filemap_get_folio(inode->i_mapping, pnum,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+ if (!IS_ERR(folio)) {
+ if (WARN_RATELIMIT(folio->mapping != inode->i_mapping,
+ "ext4: bitmap's mapping != inode->i_mapping\n")) {
/* should never happen */
- unlock_page(page);
+ folio_unlock(folio);
ret = -EINVAL;
goto err;
}
- if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, NULL, gfp);
+ if (!folio_test_uptodate(folio)) {
+ ret = ext4_mb_init_cache(folio, NULL, gfp);
if (ret) {
- unlock_page(page);
+ folio_unlock(folio);
goto err;
}
- mb_cmp_bitmaps(e4b, page_address(page) +
+ mb_cmp_bitmaps(e4b, folio_address(folio) +
(poff * sb->s_blocksize));
}
- unlock_page(page);
+ folio_unlock(folio);
}
}
- if (page == NULL) {
- ret = -ENOMEM;
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
goto err;
}
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
- /* Pages marked accessed already */
- e4b->bd_bitmap_page = page;
- e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
+ /* Folios marked accessed already */
+ e4b->bd_bitmap_folio = folio;
+ e4b->bd_bitmap = folio_address(folio) + (poff * sb->s_blocksize);
block++;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
- page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
- if (page == NULL || !PageUptodate(page)) {
- if (page)
- put_page(page);
- page = find_or_create_page(inode->i_mapping, pnum, gfp);
- if (page) {
- if (WARN_RATELIMIT(page->mapping != inode->i_mapping,
- "ext4: buddy bitmap's page->mapping != inode->i_mapping\n")) {
+ folio = __filemap_get_folio(inode->i_mapping, pnum, FGP_ACCESSED, 0);
+ if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
+ if (!IS_ERR(folio))
+ folio_put(folio);
+ folio = __filemap_get_folio(inode->i_mapping, pnum,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp);
+ if (!IS_ERR(folio)) {
+ if (WARN_RATELIMIT(folio->mapping != inode->i_mapping,
+ "ext4: buddy bitmap's mapping != inode->i_mapping\n")) {
/* should never happen */
- unlock_page(page);
+ folio_unlock(folio);
ret = -EINVAL;
goto err;
}
- if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
+ if (!folio_test_uptodate(folio)) {
+ ret = ext4_mb_init_cache(folio, e4b->bd_bitmap,
gfp);
if (ret) {
- unlock_page(page);
+ folio_unlock(folio);
goto err;
}
}
- unlock_page(page);
+ folio_unlock(folio);
}
}
- if (page == NULL) {
- ret = -ENOMEM;
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
goto err;
}
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
ret = -EIO;
goto err;
}
- /* Pages marked accessed already */
- e4b->bd_buddy_page = page;
- e4b->bd_buddy = page_address(page) + (poff * sb->s_blocksize);
+ /* Folios marked accessed already */
+ e4b->bd_buddy_folio = folio;
+ e4b->bd_buddy = folio_address(folio) + (poff * sb->s_blocksize);
return 0;
err:
- if (page)
- put_page(page);
- if (e4b->bd_bitmap_page)
- put_page(e4b->bd_bitmap_page);
+ if (!IS_ERR_OR_NULL(folio))
+ folio_put(folio);
+ if (e4b->bd_bitmap_folio)
+ folio_put(e4b->bd_bitmap_folio);
e4b->bd_buddy = NULL;
e4b->bd_bitmap = NULL;
@@ -1727,10 +1735,10 @@ static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
{
- if (e4b->bd_bitmap_page)
- put_page(e4b->bd_bitmap_page);
- if (e4b->bd_buddy_page)
- put_page(e4b->bd_buddy_page);
+ if (e4b->bd_bitmap_folio)
+ folio_put(e4b->bd_bitmap_folio);
+ if (e4b->bd_buddy_folio)
+ folio_put(e4b->bd_buddy_folio);
}
@@ -2040,13 +2048,12 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
int ord;
int mlen = 0;
int max = 0;
- int cur;
int start = ex->fe_start;
int len = ex->fe_len;
unsigned ret = 0;
int len0 = len;
void *buddy;
- bool split = false;
+ int ord_start, ord_end;
BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3));
BUG_ON(e4b->bd_group != ex->fe_group);
@@ -2071,16 +2078,12 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
/* let's maintain buddy itself */
while (len) {
- if (!split)
- ord = mb_find_order_for_block(e4b, start);
+ ord = mb_find_order_for_block(e4b, start);
if (((start >> ord) << ord) == start && len >= (1 << ord)) {
/* the whole chunk may be allocated at once! */
mlen = 1 << ord;
- if (!split)
- buddy = mb_find_buddy(e4b, ord, &max);
- else
- split = false;
+ buddy = mb_find_buddy(e4b, ord, &max);
BUG_ON((start >> ord) >= max);
mb_set_bit(start >> ord, buddy);
e4b->bd_info->bb_counters[ord]--;
@@ -2094,20 +2097,29 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex)
if (ret == 0)
ret = len | (ord << 16);
- /* we have to split large buddy */
BUG_ON(ord <= 0);
buddy = mb_find_buddy(e4b, ord, &max);
mb_set_bit(start >> ord, buddy);
e4b->bd_info->bb_counters[ord]--;
- ord--;
- cur = (start >> ord) & ~1U;
- buddy = mb_find_buddy(e4b, ord, &max);
- mb_clear_bit(cur, buddy);
- mb_clear_bit(cur + 1, buddy);
- e4b->bd_info->bb_counters[ord]++;
- e4b->bd_info->bb_counters[ord]++;
- split = true;
+ ord_start = (start >> ord) << ord;
+ ord_end = ord_start + (1 << ord);
+ /* first chunk */
+ if (start > ord_start)
+ ext4_mb_mark_free_simple(e4b->bd_sb, e4b->bd_buddy,
+ ord_start, start - ord_start,
+ e4b->bd_info);
+
+ /* last chunk */
+ if (start + len < ord_end) {
+ ext4_mb_mark_free_simple(e4b->bd_sb, e4b->bd_buddy,
+ start + len,
+ ord_end - (start + len),
+ e4b->bd_info);
+ break;
+ }
+ len = start + len - ord_end;
+ start = ord_end;
}
mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info);
@@ -2149,10 +2161,10 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
* double allocate blocks. The reference is dropped
* in ext4_mb_release_context
*/
- ac->ac_bitmap_page = e4b->bd_bitmap_page;
- get_page(ac->ac_bitmap_page);
- ac->ac_buddy_page = e4b->bd_buddy_page;
- get_page(ac->ac_buddy_page);
+ ac->ac_bitmap_folio = e4b->bd_bitmap_folio;
+ folio_get(ac->ac_bitmap_folio);
+ ac->ac_buddy_folio = e4b->bd_buddy_folio;
+ folio_get(ac->ac_buddy_folio);
/* store last allocated for subsequent stream allocation */
if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
spin_lock(&sbi->s_md_lock);
@@ -2675,7 +2687,7 @@ static int ext4_mb_good_group_nolock(struct ext4_allocation_context *ac,
int ret;
/*
- * cr=CR_POWER2_ALIGNED/CR_GOAL_LEN_FAST is a very optimistic
+ * CR_POWER2_ALIGNED/CR_GOAL_LEN_FAST is a very optimistic
* search to find large good chunks almost for free. If buddy
* data is not ready, then this optimization makes no sense. But
* we never skip the first block group in a flex_bg, since this
@@ -2856,6 +2868,7 @@ repeat:
group = ac->ac_g_ex.fe_group;
ac->ac_groups_linear_remaining = sbi->s_mb_max_linear_groups;
prefetch_grp = group;
+ nr = 0;
for (i = 0, new_cr = cr; i < ngroups; i++,
ext4_mb_choose_next_group(ac, &new_cr, &group, ngroups)) {
@@ -3186,7 +3199,6 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset)
}
static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos)
-__acquires(&EXT4_SB(sb)->s_mb_rb_lock)
{
struct super_block *sb = pde_data(file_inode(seq->file));
unsigned long position;
@@ -3440,10 +3452,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
}
if (sbi->s_mb_prefetch > ext4_get_groups_count(sb))
sbi->s_mb_prefetch = ext4_get_groups_count(sb);
- /* now many real IOs to prefetch within a single allocation at cr=0
- * given cr=0 is an CPU-related optimization we shouldn't try to
- * load too many groups, at some point we should start to use what
- * we've got in memory.
+ /*
+ * now many real IOs to prefetch within a single allocation at
+ * CR_POWER2_ALIGNED. Given CR_POWER2_ALIGNED is an CPU-related
+ * optimization we shouldn't try to load too many groups, at some point
+ * we should start to use what we've got in memory.
* with an average random access time 5ms, it'd take a second to get
* 200 groups (* N with flex_bg), so let's make this limit 4
*/
@@ -3884,8 +3897,8 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
/* No more items in the per group rb tree
* balance refcounts from ext4_mb_free_metadata()
*/
- put_page(e4b.bd_buddy_page);
- put_page(e4b.bd_bitmap_page);
+ folio_put(e4b.bd_buddy_folio);
+ folio_put(e4b.bd_bitmap_folio);
}
ext4_unlock_group(sb, entry->efd_group);
ext4_mb_unload_buddy(&e4b);
@@ -5989,10 +6002,10 @@ static void ext4_mb_release_context(struct ext4_allocation_context *ac)
ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
- if (ac->ac_bitmap_page)
- put_page(ac->ac_bitmap_page);
- if (ac->ac_buddy_page)
- put_page(ac->ac_buddy_page);
+ if (ac->ac_bitmap_folio)
+ folio_put(ac->ac_bitmap_folio);
+ if (ac->ac_buddy_folio)
+ folio_put(ac->ac_buddy_folio);
if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
mutex_unlock(&ac->ac_lg->lg_mutex);
ext4_mb_collect_stats(ac);
@@ -6113,6 +6126,7 @@ ext4_mb_new_blocks_simple(struct ext4_allocation_request *ar, int *errp)
ext4_mb_mark_bb(sb, block, 1, true);
ar->len = 1;
+ *errp = 0;
return block;
}
@@ -6307,8 +6321,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
struct rb_node *parent = NULL, *new_node;
BUG_ON(!ext4_handle_valid(handle));
- BUG_ON(e4b->bd_bitmap_page == NULL);
- BUG_ON(e4b->bd_buddy_page == NULL);
+ BUG_ON(e4b->bd_bitmap_folio == NULL);
+ BUG_ON(e4b->bd_buddy_folio == NULL);
new_node = &new_entry->efd_node;
cluster = new_entry->efd_start_cluster;
@@ -6319,8 +6333,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* otherwise we'll refresh it from
* on-disk bitmap and lose not-yet-available
* blocks */
- get_page(e4b->bd_buddy_page);
- get_page(e4b->bd_bitmap_page);
+ folio_get(e4b->bd_buddy_folio);
+ folio_get(e4b->bd_bitmap_folio);
}
while (*n) {
parent = *n;
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 56938532b4ce..d8553f1498d3 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -187,14 +187,14 @@ struct ext4_allocation_context {
struct ext4_free_extent ac_f_ex;
/*
- * goal len can change in CR1.5, so save the original len. This is
- * used while adjusting the PA window and for accounting.
+ * goal len can change in CR_BEST_AVAIL_LEN, so save the original len.
+ * This is used while adjusting the PA window and for accounting.
*/
ext4_grpblk_t ac_orig_goal_len;
__u32 ac_flags; /* allocation hints */
+ __u32 ac_groups_linear_remaining;
__u16 ac_groups_scanned;
- __u16 ac_groups_linear_remaining;
__u16 ac_found;
__u16 ac_cX_found[EXT4_MB_NUM_CRS];
__u16 ac_tail;
@@ -204,8 +204,8 @@ struct ext4_allocation_context {
__u8 ac_2order; /* if request is to allocate 2^N blocks and
* N > 0, the field stores N, otherwise 0 */
__u8 ac_op; /* operation, for history only */
- struct page *ac_bitmap_page;
- struct page *ac_buddy_page;
+ struct folio *ac_bitmap_folio;
+ struct folio *ac_buddy_folio;
struct ext4_prealloc_space *ac_pa;
struct ext4_locality_group *ac_lg;
};
@@ -215,9 +215,9 @@ struct ext4_allocation_context {
#define AC_STATUS_BREAK 3
struct ext4_buddy {
- struct page *bd_buddy_page;
+ struct folio *bd_buddy_folio;
void *bd_buddy;
- struct page *bd_bitmap_page;
+ struct folio *bd_bitmap_folio;
void *bd_bitmap;
struct ext4_group_info *bd_info;
struct super_block *bd_sb;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 7cd4afa4de1d..204f53b23622 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -199,10 +199,8 @@ mext_page_mkuptodate(struct folio *folio, unsigned from, unsigned to)
continue;
if (!buffer_mapped(bh)) {
err = ext4_get_block(inode, block, bh, 0);
- if (err) {
- folio_set_error(folio);
+ if (err)
return err;
- }
if (!buffer_mapped(bh)) {
folio_zero_range(folio, block_start, blocksize);
set_buffer_uptodate(bh);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5e4f65c14dfb..a630b27a4cc6 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2897,7 +2897,7 @@ retry:
inode = ext4_new_inode_start_handle(idmap, dir, mode,
NULL, 0, NULL,
EXT4_HT_DIR,
- EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
+ EXT4_MAXQUOTAS_TRANS_BLOCKS(dir->i_sb) +
4 + EXT4_XATTR_TRANS_BLOCKS);
handle = ext4_journal_current_handle();
err = PTR_ERR(inode);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 312bc6813357..ad5543866d21 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -117,7 +117,6 @@ static void ext4_finish_bio(struct bio *bio)
if (bio->bi_status) {
int err = blk_status_to_errno(bio->bi_status);
- folio_set_error(folio);
mapping_set_error(folio->mapping, err);
}
bh = head = folio_buffers(folio);
@@ -441,8 +440,6 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio,
BUG_ON(!folio_test_locked(folio));
BUG_ON(folio_test_writeback(folio));
- folio_clear_error(folio);
-
/*
* Comments copied from block_write_full_folio:
*
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 21e8f0aebb3c..8494492582ab 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -289,7 +289,6 @@ int ext4_mpage_readpages(struct inode *inode,
if (ext4_map_blocks(NULL, inode, &map, 0) < 0) {
set_error_page:
- folio_set_error(folio);
folio_zero_segment(folio, 0,
folio_size(folio));
folio_unlock(folio);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3fce1b80c419..893ab80dafba 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2074,8 +2074,7 @@ static int unnote_qf_name(struct fs_context *fc, int qtype)
{
struct ext4_fs_context *ctx = fc->fs_private;
- if (ctx->s_qf_names[qtype])
- kfree(ctx->s_qf_names[qtype]);
+ kfree(ctx->s_qf_names[qtype]);
ctx->s_qf_names[qtype] = NULL;
ctx->qname_spec |= 1 << qtype;
@@ -2480,8 +2479,7 @@ static int parse_options(struct fs_context *fc, char *options)
param.size = v_len;
ret = ext4_parse_param(fc, &param);
- if (param.string)
- kfree(param.string);
+ kfree(param.string);
if (ret < 0)
return ret;
}
@@ -5338,6 +5336,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
#endif
super_set_uuid(sb, es->s_uuid, sizeof(es->s_uuid));
+ super_set_sysfs_name_bdev(sb);
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
mutex_init(&sbi->s_orphan_lock);
@@ -5547,19 +5546,15 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (err)
goto failed_mount6;
- err = ext4_register_sysfs(sb);
- if (err)
- goto failed_mount7;
-
err = ext4_init_orphan_info(sb);
if (err)
- goto failed_mount8;
+ goto failed_mount7;
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount. */
if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
err = ext4_enable_quotas(sb);
if (err)
- goto failed_mount9;
+ goto failed_mount8;
}
#endif /* CONFIG_QUOTA */
@@ -5585,7 +5580,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
ext4_msg(sb, KERN_INFO, "recovery complete");
err = ext4_mark_recovery_complete(sb, es);
if (err)
- goto failed_mount10;
+ goto failed_mount9;
}
if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
@@ -5602,15 +5597,17 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
atomic_set(&sbi->s_warning_count, 0);
atomic_set(&sbi->s_msg_count, 0);
+ /* Register sysfs after all initializations are complete. */
+ err = ext4_register_sysfs(sb);
+ if (err)
+ goto failed_mount9;
+
return 0;
-failed_mount10:
+failed_mount9:
ext4_quotas_off(sb, EXT4_MAXQUOTAS);
-failed_mount9: __maybe_unused
+failed_mount8: __maybe_unused
ext4_release_orphan_info(sb);
-failed_mount8:
- ext4_unregister_sysfs(sb);
- kobject_put(&sbi->s_kobj);
failed_mount7:
ext4_unregister_li_request(sb);
failed_mount6:
@@ -6126,8 +6123,8 @@ static void ext4_update_super(struct super_block *sb)
__ext4_update_tstamp(&es->s_first_error_time,
&es->s_first_error_time_hi,
sbi->s_first_error_time);
- strncpy(es->s_first_error_func, sbi->s_first_error_func,
- sizeof(es->s_first_error_func));
+ strtomem_pad(es->s_first_error_func,
+ sbi->s_first_error_func, 0);
es->s_first_error_line =
cpu_to_le32(sbi->s_first_error_line);
es->s_first_error_ino =
@@ -6140,8 +6137,7 @@ static void ext4_update_super(struct super_block *sb)
__ext4_update_tstamp(&es->s_last_error_time,
&es->s_last_error_time_hi,
sbi->s_last_error_time);
- strncpy(es->s_last_error_func, sbi->s_last_error_func,
- sizeof(es->s_last_error_func));
+ strtomem_pad(es->s_last_error_func, sbi->s_last_error_func, 0);
es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 6d332dff79dd..ddb54608ca2e 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -29,7 +29,10 @@ typedef enum {
attr_trigger_test_error,
attr_first_error_time,
attr_last_error_time,
+ attr_clusters_in_group,
+ attr_mb_order,
attr_feature,
+ attr_pointer_pi,
attr_pointer_ui,
attr_pointer_ul,
attr_pointer_u64,
@@ -104,7 +107,7 @@ static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi,
int ret;
ret = kstrtoull(skip_spaces(buf), 0, &val);
- if (ret || val >= clusters)
+ if (ret || val >= clusters || (s64)val < 0)
return -EINVAL;
atomic64_set(&sbi->s_resv_clusters, val);
@@ -178,6 +181,9 @@ static struct ext4_attr ext4_attr_##_name = { \
#define EXT4_RO_ATTR_ES_STRING(_name,_elname,_size) \
EXT4_ATTR_STRING(_name, 0444, _size, ext4_super_block, _elname)
+#define EXT4_RW_ATTR_SBI_PI(_name,_elname) \
+ EXT4_ATTR_OFFSET(_name, 0644, pointer_pi, ext4_sb_info, _elname)
+
#define EXT4_RW_ATTR_SBI_UI(_name,_elname) \
EXT4_ATTR_OFFSET(_name, 0644, pointer_ui, ext4_sb_info, _elname)
@@ -207,23 +213,25 @@ EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444);
EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
ext4_sb_info, s_inode_readahead_blks);
+EXT4_ATTR_OFFSET(mb_group_prealloc, 0644, clusters_in_group,
+ ext4_sb_info, s_mb_group_prealloc);
+EXT4_ATTR_OFFSET(mb_best_avail_max_trim_order, 0644, mb_order,
+ ext4_sb_info, s_mb_best_avail_max_trim_order);
EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal);
EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats);
EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
-EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups);
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error);
-EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
-EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst);
-EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
-EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
-EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
-EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
-EXT4_RW_ATTR_SBI_UI(mb_best_avail_max_trim_order, s_mb_best_avail_max_trim_order);
+EXT4_RW_ATTR_SBI_PI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval);
+EXT4_RW_ATTR_SBI_PI(err_ratelimit_burst, s_err_ratelimit_state.burst);
+EXT4_RW_ATTR_SBI_PI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval);
+EXT4_RW_ATTR_SBI_PI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
+EXT4_RW_ATTR_SBI_PI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
+EXT4_RW_ATTR_SBI_PI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
#ifdef CONFIG_EXT4_DEBUG
EXT4_RW_ATTR_SBI_UL(simulate_fail, s_simulate_fail);
#endif
@@ -366,13 +374,45 @@ static ssize_t __print_tstamp(char *buf, __le32 lo, __u8 hi)
#define print_tstamp(buf, es, tstamp) \
__print_tstamp(buf, (es)->tstamp, (es)->tstamp ## _hi)
+static ssize_t ext4_generic_attr_show(struct ext4_attr *a,
+ struct ext4_sb_info *sbi, char *buf)
+{
+ void *ptr = calc_ptr(a, sbi);
+
+ if (!ptr)
+ return 0;
+
+ switch (a->attr_id) {
+ case attr_inode_readahead:
+ case attr_clusters_in_group:
+ case attr_mb_order:
+ case attr_pointer_pi:
+ case attr_pointer_ui:
+ if (a->attr_ptr == ptr_ext4_super_block_offset)
+ return sysfs_emit(buf, "%u\n", le32_to_cpup(ptr));
+ return sysfs_emit(buf, "%u\n", *((unsigned int *) ptr));
+ case attr_pointer_ul:
+ return sysfs_emit(buf, "%lu\n", *((unsigned long *) ptr));
+ case attr_pointer_u8:
+ return sysfs_emit(buf, "%u\n", *((unsigned char *) ptr));
+ case attr_pointer_u64:
+ if (a->attr_ptr == ptr_ext4_super_block_offset)
+ return sysfs_emit(buf, "%llu\n", le64_to_cpup(ptr));
+ return sysfs_emit(buf, "%llu\n", *((unsigned long long *) ptr));
+ case attr_pointer_string:
+ return sysfs_emit(buf, "%.*s\n", a->attr_size, (char *) ptr);
+ case attr_pointer_atomic:
+ return sysfs_emit(buf, "%d\n", atomic_read((atomic_t *) ptr));
+ }
+ return 0;
+}
+
static ssize_t ext4_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
s_kobj);
struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
- void *ptr = calc_ptr(a, sbi);
switch (a->attr_id) {
case attr_delayed_allocation_blocks:
@@ -391,45 +431,6 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
return sysfs_emit(buf, "%llu\n",
(unsigned long long)
percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit));
- case attr_inode_readahead:
- case attr_pointer_ui:
- if (!ptr)
- return 0;
- if (a->attr_ptr == ptr_ext4_super_block_offset)
- return sysfs_emit(buf, "%u\n",
- le32_to_cpup(ptr));
- else
- return sysfs_emit(buf, "%u\n",
- *((unsigned int *) ptr));
- case attr_pointer_ul:
- if (!ptr)
- return 0;
- return sysfs_emit(buf, "%lu\n",
- *((unsigned long *) ptr));
- case attr_pointer_u8:
- if (!ptr)
- return 0;
- return sysfs_emit(buf, "%u\n",
- *((unsigned char *) ptr));
- case attr_pointer_u64:
- if (!ptr)
- return 0;
- if (a->attr_ptr == ptr_ext4_super_block_offset)
- return sysfs_emit(buf, "%llu\n",
- le64_to_cpup(ptr));
- else
- return sysfs_emit(buf, "%llu\n",
- *((unsigned long long *) ptr));
- case attr_pointer_string:
- if (!ptr)
- return 0;
- return sysfs_emit(buf, "%.*s\n", a->attr_size,
- (char *) ptr);
- case attr_pointer_atomic:
- if (!ptr)
- return 0;
- return sysfs_emit(buf, "%d\n",
- atomic_read((atomic_t *) ptr));
case attr_feature:
return sysfs_emit(buf, "supported\n");
case attr_first_error_time:
@@ -438,29 +439,34 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
return print_tstamp(buf, sbi->s_es, s_last_error_time);
case attr_journal_task:
return journal_task_show(sbi, buf);
+ default:
+ return ext4_generic_attr_show(a, sbi, buf);
}
-
- return 0;
}
-static ssize_t ext4_attr_store(struct kobject *kobj,
- struct attribute *attr,
- const char *buf, size_t len)
+static ssize_t ext4_generic_attr_store(struct ext4_attr *a,
+ struct ext4_sb_info *sbi,
+ const char *buf, size_t len)
{
- struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
- s_kobj);
- struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
- void *ptr = calc_ptr(a, sbi);
- unsigned long t;
int ret;
+ unsigned int t;
+ unsigned long lt;
+ void *ptr = calc_ptr(a, sbi);
+
+ if (!ptr)
+ return 0;
switch (a->attr_id) {
- case attr_reserved_clusters:
- return reserved_clusters_store(sbi, buf, len);
+ case attr_pointer_pi:
+ ret = kstrtouint(skip_spaces(buf), 0, &t);
+ if (ret)
+ return ret;
+ if ((int)t < 0)
+ return -EINVAL;
+ *((unsigned int *) ptr) = t;
+ return len;
case attr_pointer_ui:
- if (!ptr)
- return 0;
- ret = kstrtoul(skip_spaces(buf), 0, &t);
+ ret = kstrtouint(skip_spaces(buf), 0, &t);
if (ret)
return ret;
if (a->attr_ptr == ptr_ext4_super_block_offset)
@@ -468,20 +474,50 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
else
*((unsigned int *) ptr) = t;
return len;
+ case attr_mb_order:
+ ret = kstrtouint(skip_spaces(buf), 0, &t);
+ if (ret)
+ return ret;
+ if (t > 64)
+ return -EINVAL;
+ *((unsigned int *) ptr) = t;
+ return len;
+ case attr_clusters_in_group:
+ ret = kstrtouint(skip_spaces(buf), 0, &t);
+ if (ret)
+ return ret;
+ if (t > sbi->s_clusters_per_group)
+ return -EINVAL;
+ *((unsigned int *) ptr) = t;
+ return len;
case attr_pointer_ul:
- if (!ptr)
- return 0;
- ret = kstrtoul(skip_spaces(buf), 0, &t);
+ ret = kstrtoul(skip_spaces(buf), 0, &lt);
if (ret)
return ret;
- *((unsigned long *) ptr) = t;
+ *((unsigned long *) ptr) = lt;
return len;
+ }
+ return 0;
+}
+
+static ssize_t ext4_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf, size_t len)
+{
+ struct ext4_sb_info *sbi = container_of(kobj, struct ext4_sb_info,
+ s_kobj);
+ struct ext4_attr *a = container_of(attr, struct ext4_attr, attr);
+
+ switch (a->attr_id) {
+ case attr_reserved_clusters:
+ return reserved_clusters_store(sbi, buf, len);
case attr_inode_readahead:
return inode_readahead_blks_store(sbi, buf, len);
case attr_trigger_test_error:
return trigger_test_error(sbi, buf, len);
+ default:
+ return ext4_generic_attr_store(a, sbi, buf, len);
}
- return 0;
}
static void ext4_sb_release(struct kobject *kobj)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b67a176bfcf9..6460879b9fcb 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1619,6 +1619,7 @@ out_err:
static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
struct ext4_xattr_search *s,
handle_t *handle, struct inode *inode,
+ struct inode *new_ea_inode,
bool is_block)
{
struct ext4_xattr_entry *last, *next;
@@ -1626,7 +1627,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
size_t min_offs = s->end - s->base, name_len = strlen(i->name);
int in_inode = i->in_inode;
struct inode *old_ea_inode = NULL;
- struct inode *new_ea_inode = NULL;
size_t old_size, new_size;
int ret;
@@ -1711,38 +1711,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
old_ea_inode = NULL;
goto out;
}
- }
- if (i->value && in_inode) {
- WARN_ON_ONCE(!i->value_len);
-
- new_ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
- i->value, i->value_len);
- if (IS_ERR(new_ea_inode)) {
- ret = PTR_ERR(new_ea_inode);
- new_ea_inode = NULL;
- goto out;
- }
- }
- if (old_ea_inode) {
/* We are ready to release ref count on the old_ea_inode. */
ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode);
- if (ret) {
- /* Release newly required ref count on new_ea_inode. */
- if (new_ea_inode) {
- int err;
-
- err = ext4_xattr_inode_dec_ref(handle,
- new_ea_inode);
- if (err)
- ext4_warning_inode(new_ea_inode,
- "dec ref new_ea_inode err=%d",
- err);
- ext4_xattr_inode_free_quota(inode, new_ea_inode,
- i->value_len);
- }
+ if (ret)
goto out;
- }
ext4_xattr_inode_free_quota(inode, old_ea_inode,
le32_to_cpu(here->e_value_size));
@@ -1866,7 +1839,6 @@ update_hash:
ret = 0;
out:
iput(old_ea_inode);
- iput(new_ea_inode);
return ret;
}
@@ -1929,9 +1901,21 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
size_t old_ea_inode_quota = 0;
unsigned int ea_ino;
-
#define header(x) ((struct ext4_xattr_header *)(x))
+ /* If we need EA inode, prepare it before locking the buffer */
+ if (i->value && i->in_inode) {
+ WARN_ON_ONCE(!i->value_len);
+
+ ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
+ i->value, i->value_len);
+ if (IS_ERR(ea_inode)) {
+ error = PTR_ERR(ea_inode);
+ ea_inode = NULL;
+ goto cleanup;
+ }
+ }
+
if (s->base) {
int offset = (char *)s->here - bs->bh->b_data;
@@ -1940,6 +1924,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
EXT4_JTR_NONE);
if (error)
goto cleanup;
+
lock_buffer(bs->bh);
if (header(s->base)->h_refcount == cpu_to_le32(1)) {
@@ -1966,7 +1951,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
}
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
- true /* is_block */);
+ ea_inode, true /* is_block */);
ext4_xattr_block_csum_set(inode, bs->bh);
unlock_buffer(bs->bh);
if (error == -EFSCORRUPTED)
@@ -2034,33 +2019,22 @@ clone_block:
s->end = s->base + sb->s_blocksize;
}
- error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */);
+ error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
+ true /* is_block */);
if (error == -EFSCORRUPTED)
goto bad_block;
if (error)
goto cleanup;
- if (i->value && s->here->e_value_inum) {
- /*
- * A ref count on ea_inode has been taken as part of the call to
- * ext4_xattr_set_entry() above. We would like to drop this
- * extra ref but we have to wait until the xattr block is
- * initialized and has its own ref count on the ea_inode.
- */
- ea_ino = le32_to_cpu(s->here->e_value_inum);
- error = ext4_xattr_inode_iget(inode, ea_ino,
- le32_to_cpu(s->here->e_hash),
- &ea_inode);
- if (error) {
- ea_inode = NULL;
+inserted:
+ if (!IS_LAST_ENTRY(s->first)) {
+ new_bh = ext4_xattr_block_cache_find(inode, header(s->base), &ce);
+ if (IS_ERR(new_bh)) {
+ error = PTR_ERR(new_bh);
+ new_bh = NULL;
goto cleanup;
}
- }
-inserted:
- if (!IS_LAST_ENTRY(s->first)) {
- new_bh = ext4_xattr_block_cache_find(inode, header(s->base),
- &ce);
if (new_bh) {
/* We found an identical block in the cache. */
if (new_bh == bs->bh)
@@ -2158,6 +2132,17 @@ getblk_failed:
ENTRY(header(s->base)+1));
if (error)
goto getblk_failed;
+ if (ea_inode) {
+ /* Drop the extra ref on ea_inode. */
+ error = ext4_xattr_inode_dec_ref(handle,
+ ea_inode);
+ if (error)
+ ext4_warning_inode(ea_inode,
+ "dec ref error=%d",
+ error);
+ iput(ea_inode);
+ ea_inode = NULL;
+ }
lock_buffer(new_bh);
error = ext4_journal_get_create_access(handle, sb,
@@ -2198,17 +2183,16 @@ getblk_failed:
cleanup:
if (ea_inode) {
- int error2;
-
- error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
- if (error2)
- ext4_warning_inode(ea_inode, "dec ref error=%d",
- error2);
+ if (error) {
+ int error2;
- /* If there was an error, revert the quota charge. */
- if (error)
+ error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
+ if (error2)
+ ext4_warning_inode(ea_inode, "dec ref error=%d",
+ error2);
ext4_xattr_inode_free_quota(inode, ea_inode,
i_size_read(ea_inode));
+ }
iput(ea_inode);
}
if (ce)
@@ -2266,14 +2250,38 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
{
struct ext4_xattr_ibody_header *header;
struct ext4_xattr_search *s = &is->s;
+ struct inode *ea_inode = NULL;
int error;
if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
return -ENOSPC;
- error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
- if (error)
+ /* If we need EA inode, prepare it before locking the buffer */
+ if (i->value && i->in_inode) {
+ WARN_ON_ONCE(!i->value_len);
+
+ ea_inode = ext4_xattr_inode_lookup_create(handle, inode,
+ i->value, i->value_len);
+ if (IS_ERR(ea_inode))
+ return PTR_ERR(ea_inode);
+ }
+ error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode,
+ false /* is_block */);
+ if (error) {
+ if (ea_inode) {
+ int error2;
+
+ error2 = ext4_xattr_inode_dec_ref(handle, ea_inode);
+ if (error2)
+ ext4_warning_inode(ea_inode, "dec ref error=%d",
+ error2);
+
+ ext4_xattr_inode_free_quota(inode, ea_inode,
+ i_size_read(ea_inode));
+ iput(ea_inode);
+ }
return error;
+ }
header = IHDR(inode, ext4_raw_inode(&is->iloc));
if (!IS_LAST_ENTRY(s->first)) {
header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
@@ -2282,6 +2290,7 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
header->h_magic = cpu_to_le32(0);
ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
}
+ iput(ea_inode);
return 0;
}
@@ -3090,8 +3099,8 @@ ext4_xattr_cmp(struct ext4_xattr_header *header1,
*
* Find an identical extended attribute block.
*
- * Returns a pointer to the block found, or NULL if such a block was
- * not found or an error occurred.
+ * Returns a pointer to the block found, or NULL if such a block was not
+ * found, or an error pointer if an error occurred while reading ea block.
*/
static struct buffer_head *
ext4_xattr_block_cache_find(struct inode *inode,
@@ -3113,11 +3122,11 @@ ext4_xattr_block_cache_find(struct inode *inode,
bh = ext4_sb_bread(inode->i_sb, ce->e_value, REQ_PRIO);
if (IS_ERR(bh)) {
- if (PTR_ERR(bh) == -ENOMEM)
- return NULL;
- bh = NULL;
- EXT4_ERROR_INODE(inode, "block %lu read error",
- (unsigned long)ce->e_value);
+ if (PTR_ERR(bh) != -ENOMEM)
+ EXT4_ERROR_INODE(inode, "block %lu read error",
+ (unsigned long)ce->e_value);
+ mb_cache_entry_put(ea_block_cache, ce);
+ return bh;
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
*pce = ce;
return bh;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index fb0628e680c4..64776891120c 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -796,6 +796,9 @@ static int ioctl_get_fs_sysfs_path(struct file *file, void __user *argp)
*
* When you add any new common ioctls to the switches above and below,
* please ensure they have compatible arguments in compat mode.
+ *
+ * The LSM mailing list should also be notified of any command additions or
+ * changes, as specific LSMs may be affected.
*/
static int do_vfs_ioctl(struct file *filp, unsigned int fd,
unsigned int cmd, unsigned long arg)
diff --git a/fs/iomap/Makefile b/fs/iomap/Makefile
index fc070184b7fa..381d76c5c232 100644
--- a/fs/iomap/Makefile
+++ b/fs/iomap/Makefile
@@ -4,7 +4,7 @@
# All Rights Reserved.
#
-ccflags-y += -I $(srctree)/$(src) # needed for trace events
+ccflags-y += -I $(src) # needed for trace events
obj-$(CONFIG_FS_IOMAP) += iomap.o
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 1c97e64c4784..951f78634adf 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -337,8 +337,6 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* Checkpoint list management */
-enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
-
/*
* journal_shrink_one_cp_list
*
@@ -350,7 +348,7 @@ enum shrink_type {SHRINK_DESTROY, SHRINK_BUSY_STOP, SHRINK_BUSY_SKIP};
* Called with j_list_lock held.
*/
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
- enum shrink_type type,
+ enum jbd2_shrink_type type,
bool *released)
{
struct journal_head *last_jh;
@@ -367,12 +365,12 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
jh = next_jh;
next_jh = jh->b_cpnext;
- if (type == SHRINK_DESTROY) {
+ if (type == JBD2_SHRINK_DESTROY) {
ret = __jbd2_journal_remove_checkpoint(jh);
} else {
ret = jbd2_journal_try_remove_checkpoint(jh);
if (ret < 0) {
- if (type == SHRINK_BUSY_SKIP)
+ if (type == JBD2_SHRINK_BUSY_SKIP)
continue;
break;
}
@@ -439,7 +437,7 @@ again:
tid = transaction->t_tid;
freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
- SHRINK_BUSY_SKIP, &released);
+ JBD2_SHRINK_BUSY_SKIP, &released);
nr_freed += freed;
(*nr_to_scan) -= min(*nr_to_scan, freed);
if (*nr_to_scan == 0)
@@ -472,21 +470,25 @@ out:
* journal_clean_checkpoint_list
*
* Find all the written-back checkpoint buffers in the journal and release them.
- * If 'destroy' is set, release all buffers unconditionally.
+ * If 'type' is JBD2_SHRINK_DESTROY, release all buffers unconditionally. If
+ * 'type' is JBD2_SHRINK_BUSY_STOP, will stop release buffers if encounters a
+ * busy buffer. To avoid wasting CPU cycles scanning the buffer list in some
+ * cases, don't pass JBD2_SHRINK_BUSY_SKIP 'type' for this function.
*
* Called with j_list_lock held.
*/
-void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal,
+ enum jbd2_shrink_type type)
{
transaction_t *transaction, *last_transaction, *next_transaction;
- enum shrink_type type;
bool released;
+ WARN_ON_ONCE(type == JBD2_SHRINK_BUSY_SKIP);
+
transaction = journal->j_checkpoint_transactions;
if (!transaction)
return;
- type = destroy ? SHRINK_DESTROY : SHRINK_BUSY_STOP;
last_transaction = transaction->t_cpprev;
next_transaction = transaction;
do {
@@ -527,7 +529,7 @@ void jbd2_journal_destroy_checkpoint(journal_t *journal)
spin_unlock(&journal->j_list_lock);
break;
}
- __jbd2_journal_clean_checkpoint_list(journal, true);
+ __jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_DESTROY);
spin_unlock(&journal->j_list_lock);
cond_resched();
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 5e122586e06e..75ea4e9a5cab 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -501,7 +501,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* frees some memory
*/
spin_lock(&journal->j_list_lock);
- __jbd2_journal_clean_checkpoint_list(journal, false);
+ __jbd2_journal_clean_checkpoint_list(journal, JBD2_SHRINK_BUSY_STOP);
spin_unlock(&journal->j_list_lock);
jbd2_debug(3, "JBD2: commit phase 1\n");
@@ -571,7 +571,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
J_ASSERT(commit_transaction->t_nr_buffers <=
atomic_read(&commit_transaction->t_outstanding_credits));
- err = 0;
bufs = 0;
descriptor = NULL;
while (commit_transaction->t_buffers) {
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 127a728fcbc8..c11516801784 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -117,7 +117,6 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
if (nsm != NULL)
refcount_inc(&nsm->sm_count);
else {
- host = NULL;
nsm = nsm_get_handle(ni->net, ni->sap, ni->salen,
ni->hostname, ni->hostname_len);
if (unlikely(nsm == NULL)) {
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 7b641095a665..50b3135d07ac 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -334,21 +334,25 @@ static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
static int export_stats_init(struct export_stats *stats)
{
stats->start_time = ktime_get_seconds();
- return nfsd_percpu_counters_init(stats->counter, EXP_STATS_COUNTERS_NUM);
+ return percpu_counter_init_many(stats->counter, 0, GFP_KERNEL,
+ EXP_STATS_COUNTERS_NUM);
}
static void export_stats_reset(struct export_stats *stats)
{
- if (stats)
- nfsd_percpu_counters_reset(stats->counter,
- EXP_STATS_COUNTERS_NUM);
+ if (stats) {
+ int i;
+
+ for (i = 0; i < EXP_STATS_COUNTERS_NUM; i++)
+ percpu_counter_set(&stats->counter[i], 0);
+ }
}
static void export_stats_destroy(struct export_stats *stats)
{
if (stats)
- nfsd_percpu_counters_destroy(stats->counter,
- EXP_STATS_COUNTERS_NUM);
+ percpu_counter_destroy_many(stats->counter,
+ EXP_STATS_COUNTERS_NUM);
}
static void svc_export_put(struct kref *ref)
diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c
index 0e1d635ec5f9..62d2586d9902 100644
--- a/fs/nfsd/netlink.c
+++ b/fs/nfsd/netlink.c
@@ -10,6 +10,36 @@
#include <uapi/linux/nfsd_netlink.h>
+/* Common nested types */
+const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1] = {
+ [NFSD_A_SOCK_ADDR] = { .type = NLA_BINARY, },
+ [NFSD_A_SOCK_TRANSPORT_NAME] = { .type = NLA_NUL_STRING, },
+};
+
+const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1] = {
+ [NFSD_A_VERSION_MAJOR] = { .type = NLA_U32, },
+ [NFSD_A_VERSION_MINOR] = { .type = NLA_U32, },
+ [NFSD_A_VERSION_ENABLED] = { .type = NLA_FLAG, },
+};
+
+/* NFSD_CMD_THREADS_SET - do */
+static const struct nla_policy nfsd_threads_set_nl_policy[NFSD_A_SERVER_SCOPE + 1] = {
+ [NFSD_A_SERVER_THREADS] = { .type = NLA_U32, },
+ [NFSD_A_SERVER_GRACETIME] = { .type = NLA_U32, },
+ [NFSD_A_SERVER_LEASETIME] = { .type = NLA_U32, },
+ [NFSD_A_SERVER_SCOPE] = { .type = NLA_NUL_STRING, },
+};
+
+/* NFSD_CMD_VERSION_SET - do */
+static const struct nla_policy nfsd_version_set_nl_policy[NFSD_A_SERVER_PROTO_VERSION + 1] = {
+ [NFSD_A_SERVER_PROTO_VERSION] = NLA_POLICY_NESTED(nfsd_version_nl_policy),
+};
+
+/* NFSD_CMD_LISTENER_SET - do */
+static const struct nla_policy nfsd_listener_set_nl_policy[NFSD_A_SERVER_SOCK_ADDR + 1] = {
+ [NFSD_A_SERVER_SOCK_ADDR] = NLA_POLICY_NESTED(nfsd_sock_nl_policy),
+};
+
/* Ops table for nfsd */
static const struct genl_split_ops nfsd_nl_ops[] = {
{
@@ -19,6 +49,42 @@ static const struct genl_split_ops nfsd_nl_ops[] = {
.done = nfsd_nl_rpc_status_get_done,
.flags = GENL_CMD_CAP_DUMP,
},
+ {
+ .cmd = NFSD_CMD_THREADS_SET,
+ .doit = nfsd_nl_threads_set_doit,
+ .policy = nfsd_threads_set_nl_policy,
+ .maxattr = NFSD_A_SERVER_SCOPE,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_THREADS_GET,
+ .doit = nfsd_nl_threads_get_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_VERSION_SET,
+ .doit = nfsd_nl_version_set_doit,
+ .policy = nfsd_version_set_nl_policy,
+ .maxattr = NFSD_A_SERVER_PROTO_VERSION,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_VERSION_GET,
+ .doit = nfsd_nl_version_get_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_LISTENER_SET,
+ .doit = nfsd_nl_listener_set_doit,
+ .policy = nfsd_listener_set_nl_policy,
+ .maxattr = NFSD_A_SERVER_SOCK_ADDR,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NFSD_CMD_LISTENER_GET,
+ .doit = nfsd_nl_listener_get_doit,
+ .flags = GENL_CMD_CAP_DO,
+ },
};
struct genl_family nfsd_nl_family __ro_after_init = {
diff --git a/fs/nfsd/netlink.h b/fs/nfsd/netlink.h
index d83dd6bdee92..e3724637d64d 100644
--- a/fs/nfsd/netlink.h
+++ b/fs/nfsd/netlink.h
@@ -11,11 +11,21 @@
#include <uapi/linux/nfsd_netlink.h>
+/* Common nested types */
+extern const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1];
+extern const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1];
+
int nfsd_nl_rpc_status_get_start(struct netlink_callback *cb);
int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb);
int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
+int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info);
+int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info);
extern struct genl_family nfsd_nl_family;
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index d4be519b5734..14ec15656320 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -218,6 +218,7 @@ struct nfsd_net {
/* Simple check to find out if a given net was properly initialized */
#define nfsd_netns_ready(nn) ((nn)->sessionid_hashtbl)
+extern bool nfsd_support_version(int vers);
extern void nfsd_netns_free_versions(struct nfsd_net *nn);
extern unsigned int nfsd_net_id;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index e88aca0c6e8e..d756f443fc44 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -978,12 +978,12 @@ static int max_cb_time(struct net *net)
return max(((u32)nn->nfsd4_lease)/10, 1u) * HZ;
}
-static struct workqueue_struct *callback_wq;
-
static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
{
- trace_nfsd_cb_queue(cb->cb_clp, cb);
- return queue_work(callback_wq, &cb->cb_work);
+ struct nfs4_client *clp = cb->cb_clp;
+
+ trace_nfsd_cb_queue(clp, cb);
+ return queue_work(clp->cl_callback_wq, &cb->cb_work);
}
static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
@@ -1153,7 +1153,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp)
void nfsd4_probe_callback_sync(struct nfs4_client *clp)
{
nfsd4_probe_callback(clp);
- flush_workqueue(callback_wq);
+ flush_workqueue(clp->cl_callback_wq);
}
void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
@@ -1372,19 +1372,6 @@ static const struct rpc_call_ops nfsd4_cb_ops = {
.rpc_release = nfsd4_cb_release,
};
-int nfsd4_create_callback_queue(void)
-{
- callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0);
- if (!callback_wq)
- return -ENOMEM;
- return 0;
-}
-
-void nfsd4_destroy_callback_queue(void)
-{
- destroy_workqueue(callback_wq);
-}
-
/* must be called under the state lock */
void nfsd4_shutdown_callback(struct nfs4_client *clp)
{
@@ -1398,7 +1385,7 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
* client, destroy the rpc client, and stop:
*/
nfsd4_run_cb(&clp->cl_cb_null);
- flush_workqueue(callback_wq);
+ flush_workqueue(clp->cl_callback_wq);
nfsd41_cb_inflight_wait_complete(clp);
}
@@ -1420,9 +1407,9 @@ static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
/*
* Note there isn't a lot of locking in this code; instead we depend on
- * the fact that it is run from the callback_wq, which won't run two
- * work items at once. So, for example, callback_wq handles all access
- * of cl_cb_client and all calls to rpc_create or rpc_shutdown_client.
+ * the fact that it is run from clp->cl_callback_wq, which won't run two
+ * work items at once. So, for example, clp->cl_callback_wq handles all
+ * access of cl_cb_client and all calls to rpc_create or rpc_shutdown_client.
*/
static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
{
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2927b1263f08..46bd20fe5c0f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1737,7 +1737,7 @@ static void cleanup_async_copy(struct nfsd4_copy *copy)
nfs4_put_copy(copy);
}
-static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr)
+static void nfsd4_send_cb_offload(struct nfsd4_copy *copy)
{
struct nfsd4_cb_offload *cbo;
@@ -1747,12 +1747,12 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr)
memcpy(&cbo->co_res, &copy->cp_res, sizeof(copy->cp_res));
memcpy(&cbo->co_fh, &copy->fh, sizeof(copy->fh));
- cbo->co_nfserr = nfserr;
+ cbo->co_nfserr = copy->nfserr;
nfsd4_init_cb(&cbo->co_cb, copy->cp_clp, &nfsd4_cb_offload_ops,
NFSPROC4_CLNT_CB_OFFLOAD);
trace_nfsd_cb_offload(copy->cp_clp, &cbo->co_res.cb_stateid,
- &cbo->co_fh, copy->cp_count, nfserr);
+ &cbo->co_fh, copy->cp_count, copy->nfserr);
nfsd4_run_cb(&cbo->co_cb);
}
@@ -1766,7 +1766,6 @@ static void nfsd4_send_cb_offload(struct nfsd4_copy *copy, __be32 nfserr)
static int nfsd4_do_async_copy(void *data)
{
struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
- __be32 nfserr;
trace_nfsd_copy_do_async(copy);
if (nfsd4_ssc_is_inter(copy)) {
@@ -1777,24 +1776,25 @@ static int nfsd4_do_async_copy(void *data)
if (IS_ERR(filp)) {
switch (PTR_ERR(filp)) {
case -EBADF:
- nfserr = nfserr_wrong_type;
+ copy->nfserr = nfserr_wrong_type;
break;
default:
- nfserr = nfserr_offload_denied;
+ copy->nfserr = nfserr_offload_denied;
}
/* ss_mnt will be unmounted by the laundromat */
goto do_callback;
}
- nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file,
- false);
+ copy->nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file,
+ false);
nfsd4_cleanup_inter_ssc(copy->ss_nsui, filp, copy->nf_dst);
} else {
- nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
- copy->nf_dst->nf_file, false);
+ copy->nfserr = nfsd4_do_copy(copy, copy->nf_src->nf_file,
+ copy->nf_dst->nf_file, false);
}
do_callback:
- nfsd4_send_cb_offload(copy, nfserr);
+ set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
+ nfsd4_send_cb_offload(copy);
cleanup_async_copy(copy);
return 0;
}
@@ -1807,6 +1807,13 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct nfsd4_copy *async_copy = NULL;
+ /*
+ * Currently, async COPY is not reliable. Force all COPY
+ * requests to be synchronous to avoid client application
+ * hangs waiting for COPY completion.
+ */
+ nfsd4_copy_set_sync(copy, true);
+
copy->cp_clp = cstate->clp;
if (nfsd4_ssc_is_inter(copy)) {
trace_nfsd_copy_inter(copy);
@@ -2003,11 +2010,16 @@ nfsd4_offload_status(struct svc_rqst *rqstp,
struct nfsd4_copy *copy;
struct nfs4_client *clp = cstate->clp;
+ os->completed = false;
spin_lock(&clp->async_lock);
copy = find_async_copy_locked(clp, &os->stateid);
- if (copy)
+ if (copy) {
os->count = copy->cp_res.wr_bytes_written;
- else
+ if (test_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags)) {
+ os->completed = true;
+ os->status = copy->nfserr;
+ }
+ } else
status = nfserr_bad_stateid;
spin_unlock(&clp->async_lock);
@@ -2154,6 +2166,29 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status == nfserr_same ? nfs_ok : status;
}
+static __be32
+nfsd4_get_dir_delegation(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *cstate,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation;
+
+ /*
+ * RFC 8881, section 18.39.3 says:
+ *
+ * "The server may refuse to grant the delegation. In that case, the
+ * server will return NFS4ERR_DIRDELEG_UNAVAIL."
+ *
+ * This is sub-optimal, since it means that the server would need to
+ * abort compound processing just because the delegation wasn't
+ * available. RFC8881bis should change this to allow the server to
+ * return NFS4_OK with a non-fatal status of GDD4_UNAVAIL in this
+ * situation.
+ */
+ gdd->gddrnf_status = GDD4_UNAVAIL;
+ return nfs_ok;
+}
+
#ifdef CONFIG_NFSD_PNFS
static const struct nfsd4_layout_ops *
nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
@@ -3082,6 +3117,18 @@ static u32 nfsd4_copy_notify_rsize(const struct svc_rqst *rqstp,
* sizeof(__be32);
}
+static u32 nfsd4_get_dir_delegation_rsize(const struct svc_rqst *rqstp,
+ const struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size +
+ 1 /* gddr_status */ +
+ op_encode_verifier_maxsz +
+ op_encode_stateid_maxsz +
+ 2 /* gddr_notification */ +
+ 2 /* gddr_child_attributes */ +
+ 2 /* gddr_dir_attributes */);
+}
+
#ifdef CONFIG_NFSD_PNFS
static u32 nfsd4_getdeviceinfo_rsize(const struct svc_rqst *rqstp,
const struct nfsd4_op *op)
@@ -3470,6 +3517,12 @@ static const struct nfsd4_operation nfsd4_ops[] = {
.op_get_currentstateid = nfsd4_get_freestateid,
.op_rsize_bop = nfsd4_only_status_rsize,
},
+ [OP_GET_DIR_DELEGATION] = {
+ .op_func = nfsd4_get_dir_delegation,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_GET_DIR_DELEGATION",
+ .op_rsize_bop = nfsd4_get_dir_delegation_rsize,
+ },
#ifdef CONFIG_NFSD_PNFS
[OP_GETDEVICEINFO] = {
.op_func = nfsd4_getdeviceinfo,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 84d4093ca713..a20c2c9d7d45 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -541,7 +541,7 @@ same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner)
}
static struct nfs4_openowner *
-find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
+find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
struct nfs4_client *clp)
{
struct nfs4_stateowner *so;
@@ -558,18 +558,6 @@ find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
return NULL;
}
-static struct nfs4_openowner *
-find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
- struct nfs4_client *clp)
-{
- struct nfs4_openowner *oo;
-
- spin_lock(&clp->cl_lock);
- oo = find_openstateowner_str_locked(hashval, open, clp);
- spin_unlock(&clp->cl_lock);
- return oo;
-}
-
static inline u32
opaque_hashval(const void *ptr, int nbytes)
{
@@ -1409,11 +1397,16 @@ static void
recalculate_deny_mode(struct nfs4_file *fp)
{
struct nfs4_ol_stateid *stp;
+ u32 old_deny;
spin_lock(&fp->fi_lock);
+ old_deny = fp->fi_share_deny;
fp->fi_share_deny = 0;
- list_for_each_entry(stp, &fp->fi_stateids, st_perfile)
+ list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap);
+ if (fp->fi_share_deny == old_deny)
+ break;
+ }
spin_unlock(&fp->fi_lock);
}
@@ -2245,6 +2238,10 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name,
GFP_KERNEL);
if (!clp->cl_ownerstr_hashtbl)
goto err_no_hashtbl;
+ clp->cl_callback_wq = alloc_ordered_workqueue("nfsd4_callbacks", 0);
+ if (!clp->cl_callback_wq)
+ goto err_no_callback_wq;
+
for (i = 0; i < OWNER_HASH_SIZE; i++)
INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
INIT_LIST_HEAD(&clp->cl_sessions);
@@ -2267,6 +2264,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name,
spin_lock_init(&clp->cl_lock);
rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
return clp;
+err_no_callback_wq:
+ kfree(clp->cl_ownerstr_hashtbl);
err_no_hashtbl:
kfree(clp->cl_name.data);
err_no_name:
@@ -2280,6 +2279,7 @@ static void __free_client(struct kref *k)
struct nfs4_client *clp = container_of(c, struct nfs4_client, cl_nfsdfs);
free_svc_cred(&clp->cl_cred);
+ destroy_workqueue(clp->cl_callback_wq);
kfree(clp->cl_ownerstr_hashtbl);
kfree(clp->cl_name.data);
kfree(clp->cl_nii_domain.data);
@@ -2352,7 +2352,11 @@ unhash_client(struct nfs4_client *clp)
static __be32 mark_client_expired_locked(struct nfs4_client *clp)
{
- if (atomic_read(&clp->cl_rpc_users))
+ int users = atomic_read(&clp->cl_rpc_users);
+
+ trace_nfsd_mark_client_expired(clp, users);
+
+ if (users)
return nfserr_jukebox;
unhash_client_locked(clp);
return nfs_ok;
@@ -3641,12 +3645,8 @@ out_nolock:
return status;
}
-static __be32
-check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
{
- dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
- slot_seqid);
-
/* The slot is in use, and no response has been sent. */
if (slot_inuse) {
if (seqid == slot_seqid)
@@ -3823,10 +3823,13 @@ nfsd4_create_session(struct svc_rqst *rqstp,
}
/* RFC 8881 Section 18.36.4 Phase 2: Sequence ID processing. */
- if (conf)
+ if (conf) {
cs_slot = &conf->cl_cs_slot;
- else
+ trace_nfsd_slot_seqid_conf(conf, cr_ses);
+ } else {
cs_slot = &unconf->cl_cs_slot;
+ trace_nfsd_slot_seqid_unconf(unconf, cr_ses);
+ }
status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
switch (status) {
case nfs_ok:
@@ -4221,6 +4224,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
* sr_highest_slotid and the sr_target_slot id to maxslots */
seq->maxslots = session->se_fchannel.maxreqs;
+ trace_nfsd_slot_seqid_sequence(clp, seq, slot);
status = check_slot_seqid(seq->seqid, slot->sl_seqid,
slot->sl_flags & NFSD4_SLOT_INUSE);
if (status == nfserr_replay_cache) {
@@ -4662,21 +4666,32 @@ nfsd4_init_leases_net(struct nfsd_net *nn)
atomic_set(&nn->nfsd_courtesy_clients, 0);
}
+enum rp_lock {
+ RP_UNLOCKED,
+ RP_LOCKED,
+ RP_UNHASHED,
+};
+
static void init_nfs4_replay(struct nfs4_replay *rp)
{
rp->rp_status = nfserr_serverfault;
rp->rp_buflen = 0;
rp->rp_buf = rp->rp_ibuf;
- mutex_init(&rp->rp_mutex);
+ atomic_set(&rp->rp_locked, RP_UNLOCKED);
}
-static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
- struct nfs4_stateowner *so)
+static int nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
+ struct nfs4_stateowner *so)
{
if (!nfsd4_has_session(cstate)) {
- mutex_lock(&so->so_replay.rp_mutex);
+ wait_var_event(&so->so_replay.rp_locked,
+ atomic_cmpxchg(&so->so_replay.rp_locked,
+ RP_UNLOCKED, RP_LOCKED) != RP_LOCKED);
+ if (atomic_read(&so->so_replay.rp_locked) == RP_UNHASHED)
+ return -EAGAIN;
cstate->replay_owner = nfs4_get_stateowner(so);
}
+ return 0;
}
void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
@@ -4685,7 +4700,8 @@ void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
if (so != NULL) {
cstate->replay_owner = NULL;
- mutex_unlock(&so->so_replay.rp_mutex);
+ atomic_set(&so->so_replay.rp_locked, RP_UNLOCKED);
+ wake_up_var(&so->so_replay.rp_locked);
nfs4_put_stateowner(so);
}
}
@@ -4866,34 +4882,46 @@ nfsd4_find_and_lock_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
}
static struct nfs4_openowner *
-alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
- struct nfsd4_compound_state *cstate)
+find_or_alloc_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
+ struct nfsd4_compound_state *cstate)
{
struct nfs4_client *clp = cstate->clp;
- struct nfs4_openowner *oo, *ret;
+ struct nfs4_openowner *oo, *new = NULL;
- oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
- if (!oo)
- return NULL;
- oo->oo_owner.so_ops = &openowner_ops;
- oo->oo_owner.so_is_open_owner = 1;
- oo->oo_owner.so_seqid = open->op_seqid;
- oo->oo_flags = 0;
- if (nfsd4_has_session(cstate))
- oo->oo_flags |= NFS4_OO_CONFIRMED;
- oo->oo_time = 0;
- oo->oo_last_closed_stid = NULL;
- INIT_LIST_HEAD(&oo->oo_close_lru);
+retry:
spin_lock(&clp->cl_lock);
- ret = find_openstateowner_str_locked(strhashval, open, clp);
- if (ret == NULL) {
- hash_openowner(oo, clp, strhashval);
- ret = oo;
- } else
- nfs4_free_stateowner(&oo->oo_owner);
-
+ oo = find_openstateowner_str(strhashval, open, clp);
+ if (!oo && new) {
+ hash_openowner(new, clp, strhashval);
+ spin_unlock(&clp->cl_lock);
+ return new;
+ }
spin_unlock(&clp->cl_lock);
- return ret;
+
+ if (oo && !(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+ /* Replace unconfirmed owners without checking for replay. */
+ release_openowner(oo);
+ oo = NULL;
+ }
+ if (oo) {
+ if (new)
+ nfs4_free_stateowner(&new->oo_owner);
+ return oo;
+ }
+
+ new = alloc_stateowner(openowner_slab, &open->op_owner, clp);
+ if (!new)
+ return NULL;
+ new->oo_owner.so_ops = &openowner_ops;
+ new->oo_owner.so_is_open_owner = 1;
+ new->oo_owner.so_seqid = open->op_seqid;
+ new->oo_flags = 0;
+ if (nfsd4_has_session(cstate))
+ new->oo_flags |= NFS4_OO_CONFIRMED;
+ new->oo_time = 0;
+ new->oo_last_closed_stid = NULL;
+ INIT_LIST_HEAD(&new->oo_close_lru);
+ goto retry;
}
static struct nfs4_ol_stateid *
@@ -4969,7 +4997,11 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
* Wait for the refcount to drop to 2. Since it has been unhashed,
* there should be no danger of the refcount going back up again at
* this point.
+ * Some threads with a reference might be waiting for rp_locked,
+ * so tell them to stop waiting.
*/
+ atomic_set(&oo->oo_owner.so_replay.rp_locked, RP_UNHASHED);
+ wake_up_var(&oo->oo_owner.so_replay.rp_locked);
wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
release_all_access(s);
@@ -5342,27 +5374,19 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
clp = cstate->clp;
strhashval = ownerstr_hashval(&open->op_owner);
- oo = find_openstateowner_str(strhashval, open, clp);
+retry:
+ oo = find_or_alloc_open_stateowner(strhashval, open, cstate);
open->op_openowner = oo;
- if (!oo) {
- goto new_owner;
- }
- if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
- /* Replace unconfirmed owners without checking for replay. */
- release_openowner(oo);
- open->op_openowner = NULL;
- goto new_owner;
+ if (!oo)
+ return nfserr_jukebox;
+ if (nfsd4_cstate_assign_replay(cstate, &oo->oo_owner) == -EAGAIN) {
+ nfs4_put_stateowner(&oo->oo_owner);
+ goto retry;
}
status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
if (status)
return status;
- goto alloc_stateid;
-new_owner:
- oo = alloc_init_open_stateowner(strhashval, open, cstate);
- if (oo == NULL)
- return nfserr_jukebox;
- open->op_openowner = oo;
-alloc_stateid:
+
open->op_stp = nfs4_alloc_open_stateid(clp);
if (!open->op_stp)
return nfserr_jukebox;
@@ -6133,12 +6157,8 @@ out:
void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
struct nfsd4_open *open)
{
- if (open->op_openowner) {
- struct nfs4_stateowner *so = &open->op_openowner->oo_owner;
-
- nfsd4_cstate_assign_replay(cstate, so);
- nfs4_put_stateowner(so);
- }
+ if (open->op_openowner)
+ nfs4_put_stateowner(&open->op_openowner->oo_owner);
if (open->op_file)
kmem_cache_free(file_slab, open->op_file);
if (open->op_stp)
@@ -7202,12 +7222,16 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
trace_nfsd_preprocess(seqid, stateid);
*stpp = NULL;
+retry:
status = nfsd4_lookup_stateid(cstate, stateid,
typemask, statusmask, &s, nn);
if (status)
return status;
stp = openlockstateid(s);
- nfsd4_cstate_assign_replay(cstate, stp->st_stateowner);
+ if (nfsd4_cstate_assign_replay(cstate, stp->st_stateowner) == -EAGAIN) {
+ nfs4_put_stateowner(stp->st_stateowner);
+ goto retry;
+ }
status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp);
if (!status)
@@ -7349,7 +7373,7 @@ out:
return status;
}
-static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
+static bool nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
{
struct nfs4_client *clp = s->st_stid.sc_client;
bool unhashed;
@@ -7366,11 +7390,11 @@ static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
list_for_each_entry(stp, &reaplist, st_locks)
nfs4_free_cpntf_statelist(clp->net, &stp->st_stid);
free_ol_stateid_reaplist(&reaplist);
+ return false;
} else {
spin_unlock(&clp->cl_lock);
free_ol_stateid_reaplist(&reaplist);
- if (unhashed)
- move_to_close_lru(s, clp->net);
+ return unhashed;
}
}
@@ -7386,6 +7410,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *stp;
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ bool need_move_to_close_list;
dprintk("NFSD: nfsd4_close on file %pd\n",
cstate->current_fh.fh_dentry);
@@ -7410,8 +7435,10 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
*/
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
- nfsd4_close_open_stateid(stp);
+ need_move_to_close_list = nfsd4_close_open_stateid(stp);
mutex_unlock(&stp->st_mutex);
+ if (need_move_to_close_list)
+ move_to_close_lru(stp, net);
/* v4.1+ suggests that we send a special stateid in here, since the
* clients should just ignore this anyway. Since this is not useful
@@ -8625,12 +8652,6 @@ nfs4_state_start(void)
if (ret)
return ret;
- ret = nfsd4_create_callback_queue();
- if (ret) {
- rhltable_destroy(&nfs4_file_rhltable);
- return ret;
- }
-
set_max_delegations();
return 0;
}
@@ -8671,7 +8692,6 @@ nfs4_state_shutdown_net(struct net *net)
void
nfs4_state_shutdown(void)
{
- nfsd4_destroy_callback_queue();
rhltable_destroy(&nfs4_file_rhltable);
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index a644460f3a5e..c7bfd2180e3f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1732,6 +1732,35 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,
return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid);
}
+static __be32
+nfsd4_decode_get_dir_delegation(struct nfsd4_compoundargs *argp,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation;
+ __be32 status;
+
+ memset(gdd, 0, sizeof(*gdd));
+
+ if (xdr_stream_decode_bool(argp->xdr, &gdd->gdda_signal_deleg_avail) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_bitmap4(argp, gdd->gdda_notification_types,
+ ARRAY_SIZE(gdd->gdda_notification_types));
+ if (status)
+ return status;
+ status = nfsd4_decode_nfstime4(argp, &gdd->gdda_child_attr_delay);
+ if (status)
+ return status;
+ status = nfsd4_decode_nfstime4(argp, &gdd->gdda_dir_attr_delay);
+ if (status)
+ return status;
+ status = nfsd4_decode_bitmap4(argp, gdd->gdda_child_attributes,
+ ARRAY_SIZE(gdd->gdda_child_attributes));
+ if (status)
+ return status;
+ return nfsd4_decode_bitmap4(argp, gdd->gdda_dir_attributes,
+ ARRAY_SIZE(gdd->gdda_dir_attributes));
+}
+
#ifdef CONFIG_NFSD_PNFS
static __be32
nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
@@ -2370,7 +2399,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
[OP_CREATE_SESSION] = nfsd4_decode_create_session,
[OP_DESTROY_SESSION] = nfsd4_decode_destroy_session,
[OP_FREE_STATEID] = nfsd4_decode_free_stateid,
- [OP_GET_DIR_DELEGATION] = nfsd4_decode_notsupp,
+ [OP_GET_DIR_DELEGATION] = nfsd4_decode_get_dir_delegation,
#ifdef CONFIG_NFSD_PNFS
[OP_GETDEVICEINFO] = nfsd4_decode_getdeviceinfo,
[OP_GETDEVICELIST] = nfsd4_decode_notsupp,
@@ -4963,6 +4992,49 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfs_ok;
}
+static __be32
+nfsd4_encode_get_dir_delegation(struct nfsd4_compoundres *resp, __be32 nfserr,
+ union nfsd4_op_u *u)
+{
+ struct nfsd4_get_dir_delegation *gdd = &u->get_dir_delegation;
+ struct xdr_stream *xdr = resp->xdr;
+ __be32 status = nfserr_resource;
+
+ switch(gdd->gddrnf_status) {
+ case GDD4_OK:
+ if (xdr_stream_encode_u32(xdr, GDD4_OK) != XDR_UNIT)
+ break;
+ status = nfsd4_encode_verifier4(xdr, &gdd->gddr_cookieverf);
+ if (status)
+ break;
+ status = nfsd4_encode_stateid4(xdr, &gdd->gddr_stateid);
+ if (status)
+ break;
+ status = nfsd4_encode_bitmap4(xdr, gdd->gddr_notification[0], 0, 0);
+ if (status)
+ break;
+ status = nfsd4_encode_bitmap4(xdr, gdd->gddr_child_attributes[0],
+ gdd->gddr_child_attributes[1],
+ gdd->gddr_child_attributes[2]);
+ if (status)
+ break;
+ status = nfsd4_encode_bitmap4(xdr, gdd->gddr_dir_attributes[0],
+ gdd->gddr_dir_attributes[1],
+ gdd->gddr_dir_attributes[2]);
+ break;
+ default:
+ pr_warn("nfsd: bad gddrnf_status (%u)\n", gdd->gddrnf_status);
+ gdd->gddrnf_will_signal_deleg_avail = 0;
+ fallthrough;
+ case GDD4_UNAVAIL:
+ if (xdr_stream_encode_u32(xdr, GDD4_UNAVAIL) != XDR_UNIT)
+ break;
+ status = nfsd4_encode_bool(xdr, gdd->gddrnf_will_signal_deleg_avail);
+ break;
+ }
+ return status;
+}
+
#ifdef CONFIG_NFSD_PNFS
static __be32
nfsd4_encode_device_addr4(struct xdr_stream *xdr,
@@ -5199,7 +5271,12 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
if (nfserr != nfs_ok)
return nfserr;
/* osr_complete<1> */
- if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
+ if (os->completed) {
+ if (xdr_stream_encode_u32(xdr, 1) != XDR_UNIT)
+ return nfserr_resource;
+ if (xdr_stream_encode_be32(xdr, os->status) != XDR_UNIT)
+ return nfserr_resource;
+ } else if (xdr_stream_encode_u32(xdr, 0) != XDR_UNIT)
return nfserr_resource;
return nfs_ok;
}
@@ -5579,7 +5656,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
[OP_CREATE_SESSION] = nfsd4_encode_create_session,
[OP_DESTROY_SESSION] = nfsd4_encode_noop,
[OP_FREE_STATEID] = nfsd4_encode_noop,
- [OP_GET_DIR_DELEGATION] = nfsd4_encode_noop,
+ [OP_GET_DIR_DELEGATION] = nfsd4_encode_get_dir_delegation,
#ifdef CONFIG_NFSD_PNFS
[OP_GETDEVICEINFO] = nfsd4_encode_getdeviceinfo,
[OP_GETDEVICELIST] = nfsd4_encode_noop,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index ecd18bffeebc..202140df8f82 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -15,6 +15,7 @@
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/gss_api.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/sunrpc/svc.h>
#include <linux/module.h>
#include <linux/fsnotify.h>
@@ -48,12 +49,10 @@ enum {
NFSD_MaxBlkSize,
NFSD_MaxConnections,
NFSD_Filecache,
-#ifdef CONFIG_NFSD_V4
NFSD_Leasetime,
NFSD_Gracetime,
NFSD_RecoveryDir,
NFSD_V4EndGrace,
-#endif
NFSD_MaxReserved
};
@@ -406,7 +405,9 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
if (newthreads < 0)
return -EINVAL;
trace_nfsd_ctl_threads(net, newthreads);
- rv = nfsd_svc(newthreads, net, file->f_cred);
+ mutex_lock(&nfsd_mutex);
+ rv = nfsd_svc(newthreads, net, file->f_cred, NULL);
+ mutex_unlock(&nfsd_mutex);
if (rv < 0)
return rv;
} else
@@ -1360,7 +1361,9 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc)
#ifdef CONFIG_NFSD_V4
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
+#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING
[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
+#endif
[NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO},
#endif
/* last one */ {""}
@@ -1652,6 +1655,518 @@ int nfsd_nl_rpc_status_get_done(struct netlink_callback *cb)
}
/**
+ * nfsd_nl_threads_set_doit - set the number of running threads
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_threads_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ int nthreads = 0, count = 0, nrpools, ret = -EOPNOTSUPP, rem;
+ struct net *net = genl_info_net(info);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ const struct nlattr *attr;
+ const char *scope = NULL;
+
+ if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_THREADS))
+ return -EINVAL;
+
+ /* count number of SERVER_THREADS values */
+ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
+ if (nla_type(attr) == NFSD_A_SERVER_THREADS)
+ count++;
+ }
+
+ mutex_lock(&nfsd_mutex);
+
+ nrpools = nfsd_nrpools(net);
+ if (nrpools && count > nrpools)
+ count = nrpools;
+
+ /* XXX: make this handle non-global pool-modes */
+ if (count > 1)
+ goto out_unlock;
+
+ nthreads = nla_get_u32(info->attrs[NFSD_A_SERVER_THREADS]);
+ if (info->attrs[NFSD_A_SERVER_GRACETIME] ||
+ info->attrs[NFSD_A_SERVER_LEASETIME] ||
+ info->attrs[NFSD_A_SERVER_SCOPE]) {
+ ret = -EBUSY;
+ if (nn->nfsd_serv && nn->nfsd_serv->sv_nrthreads)
+ goto out_unlock;
+
+ ret = -EINVAL;
+ attr = info->attrs[NFSD_A_SERVER_GRACETIME];
+ if (attr) {
+ u32 gracetime = nla_get_u32(attr);
+
+ if (gracetime < 10 || gracetime > 3600)
+ goto out_unlock;
+
+ nn->nfsd4_grace = gracetime;
+ }
+
+ attr = info->attrs[NFSD_A_SERVER_LEASETIME];
+ if (attr) {
+ u32 leasetime = nla_get_u32(attr);
+
+ if (leasetime < 10 || leasetime > 3600)
+ goto out_unlock;
+
+ nn->nfsd4_lease = leasetime;
+ }
+
+ attr = info->attrs[NFSD_A_SERVER_SCOPE];
+ if (attr)
+ scope = nla_data(attr);
+ }
+
+ ret = nfsd_svc(nthreads, net, get_current_cred(), scope);
+
+out_unlock:
+ mutex_unlock(&nfsd_mutex);
+
+ return ret == nthreads ? 0 : ret;
+}
+
+/**
+ * nfsd_nl_threads_get_doit - get the number of running threads
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_threads_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ void *hdr;
+ int err;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_free_msg;
+ }
+
+ mutex_lock(&nfsd_mutex);
+
+ err = nla_put_u32(skb, NFSD_A_SERVER_GRACETIME,
+ nn->nfsd4_grace) ||
+ nla_put_u32(skb, NFSD_A_SERVER_LEASETIME,
+ nn->nfsd4_lease) ||
+ nla_put_string(skb, NFSD_A_SERVER_SCOPE,
+ nn->nfsd_name);
+ if (err)
+ goto err_unlock;
+
+ if (nn->nfsd_serv) {
+ int i;
+
+ for (i = 0; i < nfsd_nrpools(net); ++i) {
+ struct svc_pool *sp = &nn->nfsd_serv->sv_pools[i];
+
+ err = nla_put_u32(skb, NFSD_A_SERVER_THREADS,
+ atomic_read(&sp->sp_nrthreads));
+ if (err)
+ goto err_unlock;
+ }
+ } else {
+ err = nla_put_u32(skb, NFSD_A_SERVER_THREADS, 0);
+ if (err)
+ goto err_unlock;
+ }
+
+ mutex_unlock(&nfsd_mutex);
+
+ genlmsg_end(skb, hdr);
+
+ return genlmsg_reply(skb, info);
+
+err_unlock:
+ mutex_unlock(&nfsd_mutex);
+err_free_msg:
+ nlmsg_free(skb);
+
+ return err;
+}
+
+/**
+ * nfsd_nl_version_set_doit - set the nfs enabled versions
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_version_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ const struct nlattr *attr;
+ struct nfsd_net *nn;
+ int i, rem;
+
+ if (GENL_REQ_ATTR_CHECK(info, NFSD_A_SERVER_PROTO_VERSION))
+ return -EINVAL;
+
+ mutex_lock(&nfsd_mutex);
+
+ nn = net_generic(genl_info_net(info), nfsd_net_id);
+ if (nn->nfsd_serv) {
+ mutex_unlock(&nfsd_mutex);
+ return -EBUSY;
+ }
+
+ /* clear current supported versions. */
+ nfsd_vers(nn, 2, NFSD_CLEAR);
+ nfsd_vers(nn, 3, NFSD_CLEAR);
+ for (i = 0; i <= NFSD_SUPPORTED_MINOR_VERSION; i++)
+ nfsd_minorversion(nn, i, NFSD_CLEAR);
+
+ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
+ struct nlattr *tb[NFSD_A_VERSION_MAX + 1];
+ u32 major, minor = 0;
+ bool enabled;
+
+ if (nla_type(attr) != NFSD_A_SERVER_PROTO_VERSION)
+ continue;
+
+ if (nla_parse_nested(tb, NFSD_A_VERSION_MAX, attr,
+ nfsd_version_nl_policy, info->extack) < 0)
+ continue;
+
+ if (!tb[NFSD_A_VERSION_MAJOR])
+ continue;
+
+ major = nla_get_u32(tb[NFSD_A_VERSION_MAJOR]);
+ if (tb[NFSD_A_VERSION_MINOR])
+ minor = nla_get_u32(tb[NFSD_A_VERSION_MINOR]);
+
+ enabled = nla_get_flag(tb[NFSD_A_VERSION_ENABLED]);
+
+ switch (major) {
+ case 4:
+ nfsd_minorversion(nn, minor, enabled ? NFSD_SET : NFSD_CLEAR);
+ break;
+ case 3:
+ case 2:
+ if (!minor)
+ nfsd_vers(nn, major, enabled ? NFSD_SET : NFSD_CLEAR);
+ break;
+ default:
+ break;
+ }
+ }
+
+ mutex_unlock(&nfsd_mutex);
+
+ return 0;
+}
+
+/**
+ * nfsd_nl_version_get_doit - get the enabled status for all supported nfs versions
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_version_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nfsd_net *nn;
+ int i, err;
+ void *hdr;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_free_msg;
+ }
+
+ mutex_lock(&nfsd_mutex);
+ nn = net_generic(genl_info_net(info), nfsd_net_id);
+
+ for (i = 2; i <= 4; i++) {
+ int j;
+
+ for (j = 0; j <= NFSD_SUPPORTED_MINOR_VERSION; j++) {
+ struct nlattr *attr;
+
+ /* Don't record any versions the kernel doesn't have
+ * compiled in
+ */
+ if (!nfsd_support_version(i))
+ continue;
+
+ /* NFSv{2,3} does not support minor numbers */
+ if (i < 4 && j)
+ continue;
+
+ attr = nla_nest_start(skb,
+ NFSD_A_SERVER_PROTO_VERSION);
+ if (!attr) {
+ err = -EINVAL;
+ goto err_nfsd_unlock;
+ }
+
+ if (nla_put_u32(skb, NFSD_A_VERSION_MAJOR, i) ||
+ nla_put_u32(skb, NFSD_A_VERSION_MINOR, j)) {
+ err = -EINVAL;
+ goto err_nfsd_unlock;
+ }
+
+ /* Set the enabled flag if the version is enabled */
+ if (nfsd_vers(nn, i, NFSD_TEST) &&
+ (i < 4 || nfsd_minorversion(nn, j, NFSD_TEST)) &&
+ nla_put_flag(skb, NFSD_A_VERSION_ENABLED)) {
+ err = -EINVAL;
+ goto err_nfsd_unlock;
+ }
+
+ nla_nest_end(skb, attr);
+ }
+ }
+
+ mutex_unlock(&nfsd_mutex);
+ genlmsg_end(skb, hdr);
+
+ return genlmsg_reply(skb, info);
+
+err_nfsd_unlock:
+ mutex_unlock(&nfsd_mutex);
+err_free_msg:
+ nlmsg_free(skb);
+
+ return err;
+}
+
+/**
+ * nfsd_nl_listener_set_doit - set the nfs running sockets
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = genl_info_net(info);
+ struct svc_xprt *xprt, *tmp;
+ const struct nlattr *attr;
+ struct svc_serv *serv;
+ LIST_HEAD(permsocks);
+ struct nfsd_net *nn;
+ int err, rem;
+
+ mutex_lock(&nfsd_mutex);
+
+ err = nfsd_create_serv(net);
+ if (err) {
+ mutex_unlock(&nfsd_mutex);
+ return err;
+ }
+
+ nn = net_generic(net, nfsd_net_id);
+ serv = nn->nfsd_serv;
+
+ spin_lock_bh(&serv->sv_lock);
+
+ /* Move all of the old listener sockets to a temp list */
+ list_splice_init(&serv->sv_permsocks, &permsocks);
+
+ /*
+ * Walk the list of server_socks from userland and move any that match
+ * back to sv_permsocks
+ */
+ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
+ struct nlattr *tb[NFSD_A_SOCK_MAX + 1];
+ const char *xcl_name;
+ struct sockaddr *sa;
+
+ if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR)
+ continue;
+
+ if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr,
+ nfsd_sock_nl_policy, info->extack) < 0)
+ continue;
+
+ if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME])
+ continue;
+
+ if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa))
+ continue;
+
+ xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]);
+ sa = nla_data(tb[NFSD_A_SOCK_ADDR]);
+
+ /* Put back any matching sockets */
+ list_for_each_entry_safe(xprt, tmp, &permsocks, xpt_list) {
+ /* This shouldn't be possible */
+ if (WARN_ON_ONCE(xprt->xpt_net != net)) {
+ list_move(&xprt->xpt_list, &serv->sv_permsocks);
+ continue;
+ }
+
+ /* If everything matches, put it back */
+ if (!strcmp(xprt->xpt_class->xcl_name, xcl_name) &&
+ rpc_cmp_addr_port(sa, (struct sockaddr *)&xprt->xpt_local)) {
+ list_move(&xprt->xpt_list, &serv->sv_permsocks);
+ break;
+ }
+ }
+ }
+
+ /* For now, no removing old sockets while server is running */
+ if (serv->sv_nrthreads && !list_empty(&permsocks)) {
+ list_splice_init(&permsocks, &serv->sv_permsocks);
+ spin_unlock_bh(&serv->sv_lock);
+ err = -EBUSY;
+ goto out_unlock_mtx;
+ }
+
+ /* Close the remaining sockets on the permsocks list */
+ while (!list_empty(&permsocks)) {
+ xprt = list_first_entry(&permsocks, struct svc_xprt, xpt_list);
+ list_move(&xprt->xpt_list, &serv->sv_permsocks);
+
+ /*
+ * Newly-created sockets are born with the BUSY bit set. Clear
+ * it if there are no threads, since nothing can pick it up
+ * in that case.
+ */
+ if (!serv->sv_nrthreads)
+ clear_bit(XPT_BUSY, &xprt->xpt_flags);
+
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+ spin_unlock_bh(&serv->sv_lock);
+ svc_xprt_close(xprt);
+ spin_lock_bh(&serv->sv_lock);
+ }
+
+ spin_unlock_bh(&serv->sv_lock);
+
+ /* walk list of addrs again, open any that still don't exist */
+ nlmsg_for_each_attr(attr, info->nlhdr, GENL_HDRLEN, rem) {
+ struct nlattr *tb[NFSD_A_SOCK_MAX + 1];
+ const char *xcl_name;
+ struct sockaddr *sa;
+ int ret;
+
+ if (nla_type(attr) != NFSD_A_SERVER_SOCK_ADDR)
+ continue;
+
+ if (nla_parse_nested(tb, NFSD_A_SOCK_MAX, attr,
+ nfsd_sock_nl_policy, info->extack) < 0)
+ continue;
+
+ if (!tb[NFSD_A_SOCK_ADDR] || !tb[NFSD_A_SOCK_TRANSPORT_NAME])
+ continue;
+
+ if (nla_len(tb[NFSD_A_SOCK_ADDR]) < sizeof(*sa))
+ continue;
+
+ xcl_name = nla_data(tb[NFSD_A_SOCK_TRANSPORT_NAME]);
+ sa = nla_data(tb[NFSD_A_SOCK_ADDR]);
+
+ xprt = svc_find_listener(serv, xcl_name, net, sa);
+ if (xprt) {
+ svc_xprt_put(xprt);
+ continue;
+ }
+
+ ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa,
+ SVC_SOCK_ANONYMOUS,
+ get_current_cred());
+ /* always save the latest error */
+ if (ret < 0)
+ err = ret;
+ }
+
+ if (!serv->sv_nrthreads && list_empty(&nn->nfsd_serv->sv_permsocks))
+ nfsd_destroy_serv(net);
+
+out_unlock_mtx:
+ mutex_unlock(&nfsd_mutex);
+
+ return err;
+}
+
+/**
+ * nfsd_nl_listener_get_doit - get the nfs running listeners
+ * @skb: reply buffer
+ * @info: netlink metadata and command arguments
+ *
+ * Return 0 on success or a negative errno.
+ */
+int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct svc_xprt *xprt;
+ struct svc_serv *serv;
+ struct nfsd_net *nn;
+ void *hdr;
+ int err;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_iput(skb, info);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto err_free_msg;
+ }
+
+ mutex_lock(&nfsd_mutex);
+ nn = net_generic(genl_info_net(info), nfsd_net_id);
+
+ /* no nfs server? Just send empty socket list */
+ if (!nn->nfsd_serv)
+ goto out_unlock_mtx;
+
+ serv = nn->nfsd_serv;
+ spin_lock_bh(&serv->sv_lock);
+ list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) {
+ struct nlattr *attr;
+
+ attr = nla_nest_start(skb, NFSD_A_SERVER_SOCK_ADDR);
+ if (!attr) {
+ err = -EINVAL;
+ goto err_serv_unlock;
+ }
+
+ if (nla_put_string(skb, NFSD_A_SOCK_TRANSPORT_NAME,
+ xprt->xpt_class->xcl_name) ||
+ nla_put(skb, NFSD_A_SOCK_ADDR,
+ sizeof(struct sockaddr_storage),
+ &xprt->xpt_local)) {
+ err = -EINVAL;
+ goto err_serv_unlock;
+ }
+
+ nla_nest_end(skb, attr);
+ }
+ spin_unlock_bh(&serv->sv_lock);
+out_unlock_mtx:
+ mutex_unlock(&nfsd_mutex);
+ genlmsg_end(skb, hdr);
+
+ return genlmsg_reply(skb, info);
+
+err_serv_unlock:
+ spin_unlock_bh(&serv->sv_lock);
+ mutex_unlock(&nfsd_mutex);
+err_free_msg:
+ nlmsg_free(skb);
+
+ return err;
+}
+
+/**
* nfsd_net_init - Prepare the nfsd_net portion of a new net namespace
* @net: a freshly-created network namespace
*
@@ -1672,7 +2187,8 @@ static __net_init int nfsd_net_init(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
- retval = nfsd_stat_counters_init(nn);
+ retval = percpu_counter_init_many(nn->counter, 0, GFP_KERNEL,
+ NFSD_STATS_COUNTERS_NUM);
if (retval)
goto out_repcache_error;
memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats));
@@ -1704,7 +2220,7 @@ static __net_exit void nfsd_net_exit(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfsd_proc_stat_shutdown(net);
- nfsd_stat_counters_destroy(nn);
+ percpu_counter_destroy_many(nn->counter, NFSD_STATS_COUNTERS_NUM);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(nn);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 16c5a05f340e..8f4f239d9f8a 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -103,7 +103,7 @@ bool nfssvc_encode_voidres(struct svc_rqst *rqstp,
/*
* Function prototypes.
*/
-int nfsd_svc(int nrservs, struct net *net, const struct cred *cred);
+int nfsd_svc(int nrservs, struct net *net, const struct cred *cred, const char *scope);
int nfsd_dispatch(struct svc_rqst *rqstp);
int nfsd_nrthreads(struct net *);
@@ -230,7 +230,6 @@ void nfsd_lockd_shutdown(void);
#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC)
#define nfserr_rofs cpu_to_be32(NFSERR_ROFS)
#define nfserr_mlink cpu_to_be32(NFSERR_MLINK)
-#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP)
#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG)
#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY)
#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 40fecf7b224f..0b75305fb5f5 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -573,7 +573,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
_fh_update(fhp, exp, dentry);
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
fh_put(fhp);
- return nfserr_opnotsupp;
+ return nfserr_stale;
}
return 0;
@@ -599,7 +599,7 @@ fh_update(struct svc_fh *fhp)
_fh_update(fhp, fhp->fh_export, dentry);
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID)
- return nfserr_opnotsupp;
+ return nfserr_stale;
return 0;
out_bad:
printk(KERN_ERR "fh_update: fh not verified!\n");
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index c0d17b92b249..cd9a6a1a9fc8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -133,8 +133,7 @@ struct svc_program nfsd_program = {
.pg_rpcbind_set = nfsd_rpcbind_set,
};
-static bool
-nfsd_support_version(int vers)
+bool nfsd_support_version(int vers)
{
if (vers >= NFSD_MINVERS && vers < NFSD_NRVERS)
return nfsd_version[vers] != NULL;
@@ -769,13 +768,14 @@ int nfsd_set_nrthreads(int n, int *nthreads, struct net *net)
* this is the first time nrservs is nonzero.
*/
int
-nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
+nfsd_svc(int nrservs, struct net *net, const struct cred *cred, const char *scope)
{
int error;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct svc_serv *serv;
- mutex_lock(&nfsd_mutex);
+ lockdep_assert_held(&nfsd_mutex);
+
dprintk("nfsd: creating service\n");
nrservs = max(nrservs, 0);
@@ -785,7 +785,7 @@ nfsd_svc(int nrservs, struct net *net, const struct cred *cred)
if (nrservs == 0 && nn->nfsd_serv == NULL)
goto out;
- strscpy(nn->nfsd_name, utsname()->nodename,
+ strscpy(nn->nfsd_name, scope ? scope : utsname()->nodename,
sizeof(nn->nfsd_name));
error = nfsd_create_serv(net);
@@ -804,7 +804,6 @@ out_put:
if (serv->sv_nrthreads == 0)
nfsd_destroy_serv(net);
out:
- mutex_unlock(&nfsd_mutex);
return error;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 2ed0fcf879fd..ffc217099d19 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -408,6 +408,8 @@ struct nfs4_client {
1 << NFSD4_CLIENT_CB_KILL)
#define NFSD4_CLIENT_CB_RECALL_ANY (6)
unsigned long cl_flags;
+
+ struct workqueue_struct *cl_callback_wq;
const struct cred *cl_cb_cred;
struct rpc_clnt *cl_cb_client;
u32 cl_cb_ident;
@@ -486,7 +488,7 @@ struct nfs4_replay {
unsigned int rp_buflen;
char *rp_buf;
struct knfsd_fh rp_openfh;
- struct mutex rp_mutex;
+ atomic_t rp_locked;
char rp_ibuf[NFSD4_REPLAY_ISIZE];
};
@@ -735,8 +737,6 @@ extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *
extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
const struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
extern bool nfsd4_run_cb(struct nfsd4_callback *cb);
-extern int nfsd4_create_callback_queue(void);
-extern void nfsd4_destroy_callback_queue(void);
extern void nfsd4_shutdown_callback(struct nfs4_client *);
extern void nfsd4_shutdown_copy(struct nfs4_client *clp);
extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name,
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index be52fb1e928e..bb22893f1157 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -73,48 +73,6 @@ static int nfsd_show(struct seq_file *seq, void *v)
DEFINE_PROC_SHOW_ATTRIBUTE(nfsd);
-int nfsd_percpu_counters_init(struct percpu_counter *counters, int num)
-{
- int i, err = 0;
-
- for (i = 0; !err && i < num; i++)
- err = percpu_counter_init(&counters[i], 0, GFP_KERNEL);
-
- if (!err)
- return 0;
-
- for (; i > 0; i--)
- percpu_counter_destroy(&counters[i-1]);
-
- return err;
-}
-
-void nfsd_percpu_counters_reset(struct percpu_counter counters[], int num)
-{
- int i;
-
- for (i = 0; i < num; i++)
- percpu_counter_set(&counters[i], 0);
-}
-
-void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num)
-{
- int i;
-
- for (i = 0; i < num; i++)
- percpu_counter_destroy(&counters[i]);
-}
-
-int nfsd_stat_counters_init(struct nfsd_net *nn)
-{
- return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM);
-}
-
-void nfsd_stat_counters_destroy(struct nfsd_net *nn)
-{
- nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM);
-}
-
void nfsd_proc_stat_init(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index d2753e975dfd..04aacb6c36e2 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -10,11 +10,6 @@
#include <uapi/linux/nfsd/stats.h>
#include <linux/percpu_counter.h>
-int nfsd_percpu_counters_init(struct percpu_counter *counters, int num);
-void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num);
-void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num);
-int nfsd_stat_counters_init(struct nfsd_net *nn);
-void nfsd_stat_counters_destroy(struct nfsd_net *nn);
void nfsd_proc_stat_init(struct net *net);
void nfsd_proc_stat_shutdown(struct net *net);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 1cd2076210b1..b5e48d504062 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -749,6 +749,76 @@ TRACE_EVENT_CONDITION(nfsd_seq4_status,
)
);
+DECLARE_EVENT_CLASS(nfsd_cs_slot_class,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct nfsd4_create_session *cs
+ ),
+ TP_ARGS(clp, cs),
+ TP_STRUCT__entry(
+ __field(u32, seqid)
+ __field(u32, slot_seqid)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ const struct nfsd4_clid_slot *slot = &clp->cl_cs_slot;
+
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen);
+ __entry->seqid = cs->seqid;
+ __entry->slot_seqid = slot->sl_seqid;
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x seqid=%u slot_seqid=%u",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->seqid, __entry->slot_seqid
+ )
+);
+
+#define DEFINE_CS_SLOT_EVENT(name) \
+DEFINE_EVENT(nfsd_cs_slot_class, nfsd_##name, \
+ TP_PROTO( \
+ const struct nfs4_client *clp, \
+ const struct nfsd4_create_session *cs \
+ ), \
+ TP_ARGS(clp, cs))
+
+DEFINE_CS_SLOT_EVENT(slot_seqid_conf);
+DEFINE_CS_SLOT_EVENT(slot_seqid_unconf);
+
+TRACE_EVENT(nfsd_slot_seqid_sequence,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct nfsd4_sequence *seq,
+ const struct nfsd4_slot *slot
+ ),
+ TP_ARGS(clp, seq, slot),
+ TP_STRUCT__entry(
+ __field(u32, seqid)
+ __field(u32, slot_seqid)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ __field(bool, in_use)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen);
+ __entry->seqid = seq->seqid;
+ __entry->slot_seqid = slot->sl_seqid;
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x seqid=%u slot_seqid=%u (%sin use)",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->seqid, __entry->slot_seqid,
+ __entry->in_use ? "" : "not "
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_clientid_class,
TP_PROTO(const clientid_t *clid),
TP_ARGS(clid),
@@ -778,6 +848,30 @@ DEFINE_CLIENTID_EVENT(purged);
DEFINE_CLIENTID_EVENT(renew);
DEFINE_CLIENTID_EVENT(stale);
+TRACE_EVENT(nfsd_mark_client_expired,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ int cl_rpc_users
+ ),
+ TP_ARGS(clp, cl_rpc_users),
+ TP_STRUCT__entry(
+ __field(int, cl_rpc_users)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_rpc_users = cl_rpc_users;
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x cl_rpc_users=%d",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->cl_rpc_users)
+);
+
DECLARE_EVENT_CLASS(nfsd_net_class,
TP_PROTO(const struct nfsd_net *nn),
TP_ARGS(nn),
@@ -1534,7 +1628,7 @@ TRACE_EVENT(nfsd_cb_seq_status,
__entry->seq_status = cb->cb_seq_status;
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
- " sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d\n",
+ " sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d",
__entry->task_id, __entry->client_id,
__entry->cl_boot, __entry->cl_id,
__entry->seqno, __entry->reserved,
@@ -1573,7 +1667,7 @@ TRACE_EVENT(nfsd_cb_free_slot,
__entry->slot_seqno = session->se_cb_seq_nr;
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
- " sessionid=%08x:%08x:%08x:%08x new slot seqno=%u\n",
+ " sessionid=%08x:%08x:%08x:%08x new slot seqno=%u",
__entry->task_id, __entry->client_id,
__entry->cl_boot, __entry->cl_id,
__entry->seqno, __entry->reserved,
@@ -1978,7 +2072,7 @@ TRACE_EVENT(nfsd_ctl_time,
__entry->time = time;
__assign_str(name, name);
),
- TP_printk("file=%s time=%d\n",
+ TP_printk("file=%s time=%d",
__get_str(name), __entry->time
)
);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 2e41eb4c3cec..29b1f3613800 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1422,7 +1422,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
* Callers expect new file metadata to be committed even
* if the attributes have not changed.
*/
- if (iap->ia_valid)
+ if (nfsd_attrs_valid(attrs))
status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
else
status = nfserrno(commit_metadata(resfhp));
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index c60fdb6200fd..57cd70062048 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -60,6 +60,14 @@ static inline void nfsd_attrs_free(struct nfsd_attrs *attrs)
posix_acl_release(attrs->na_dpacl);
}
+static inline bool nfsd_attrs_valid(struct nfsd_attrs *attrs)
+{
+ struct iattr *iap = attrs->na_iattr;
+
+ return (iap->ia_valid || (attrs->na_seclabel &&
+ attrs->na_seclabel->len));
+}
+
__be32 nfserrno (int errno);
int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
struct svc_export **expp);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 415516c1b27e..fbdd42cde1fa 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -518,6 +518,24 @@ struct nfsd4_free_stateid {
stateid_t fr_stateid; /* request */
};
+struct nfsd4_get_dir_delegation {
+ /* request */
+ u32 gdda_signal_deleg_avail;
+ u32 gdda_notification_types[1];
+ struct timespec64 gdda_child_attr_delay;
+ struct timespec64 gdda_dir_attr_delay;
+ u32 gdda_child_attributes[3];
+ u32 gdda_dir_attributes[3];
+ /* response */
+ u32 gddrnf_status;
+ nfs4_verifier gddr_cookieverf;
+ stateid_t gddr_stateid;
+ u32 gddr_notification[1];
+ u32 gddr_child_attributes[3];
+ u32 gddr_dir_attributes[3];
+ bool gddrnf_will_signal_deleg_avail;
+};
+
/* also used for NVERIFY */
struct nfsd4_verify {
u32 ve_bmval[3]; /* request */
@@ -674,8 +692,10 @@ struct nfsd4_copy {
#define NFSD4_COPY_F_INTRA (1)
#define NFSD4_COPY_F_SYNCHRONOUS (2)
#define NFSD4_COPY_F_COMMITTED (3)
+#define NFSD4_COPY_F_COMPLETED (4)
/* response */
+ __be32 nfserr;
struct nfsd42_write_res cp_res;
struct knfsd_fh fh;
@@ -735,7 +755,8 @@ struct nfsd4_offload_status {
/* response */
u64 count;
- u32 status;
+ __be32 status;
+ bool completed;
};
struct nfsd4_copy_notify {
@@ -797,6 +818,7 @@ struct nfsd4_op {
struct nfsd4_reclaim_complete reclaim_complete;
struct nfsd4_test_stateid test_stateid;
struct nfsd4_free_stateid free_stateid;
+ struct nfsd4_get_dir_delegation get_dir_delegation;
struct nfsd4_getdeviceinfo getdeviceinfo;
struct nfsd4_layoutget layoutget;
struct nfsd4_layoutcommit layoutcommit;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 960080753d3b..2b8fa3e782fb 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1784,6 +1784,9 @@ static int o2net_accept_one(struct socket *sock, int *more)
struct o2nm_node *node = NULL;
struct o2nm_node *local_node = NULL;
struct o2net_sock_container *sc = NULL;
+ struct proto_accept_arg arg = {
+ .flags = O_NONBLOCK,
+ };
struct o2net_node *nn;
unsigned int nofs_flag;
@@ -1802,7 +1805,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
new_sock->type = sock->type;
new_sock->ops = sock->ops;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
+ ret = sock->ops->accept(sock, new_sock, &arg);
if (ret < 0)
goto out;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 37cde0efee57..b1c2c0b82116 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -30,9 +30,7 @@ static const struct file_operations proc_sys_dir_file_operations;
static const struct inode_operations proc_sys_dir_operations;
/* Support for permanently empty directories */
-static struct ctl_table sysctl_mount_point[] = {
- {.type = SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY }
-};
+static struct ctl_table sysctl_mount_point[] = { };
/**
* register_sysctl_mount_point() - registers a sysctl mount point
@@ -48,14 +46,12 @@ struct ctl_table_header *register_sysctl_mount_point(const char *path)
}
EXPORT_SYMBOL(register_sysctl_mount_point);
-#define sysctl_is_perm_empty_ctl_table(tptr) \
- (tptr[0].type == SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY)
#define sysctl_is_perm_empty_ctl_header(hptr) \
- (sysctl_is_perm_empty_ctl_table(hptr->ctl_table))
+ (hptr->type == SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY)
#define sysctl_set_perm_empty_ctl_header(hptr) \
- (hptr->ctl_table[0].type = SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY)
+ (hptr->type = SYSCTL_TABLE_TYPE_PERMANENTLY_EMPTY)
#define sysctl_clear_perm_empty_ctl_header(hptr) \
- (hptr->ctl_table[0].type = SYSCTL_TABLE_TYPE_DEFAULT)
+ (hptr->type = SYSCTL_TABLE_TYPE_DEFAULT)
void proc_sys_poll_notify(struct ctl_table_poll *poll)
{
@@ -210,6 +206,8 @@ static void init_header(struct ctl_table_header *head,
node++;
}
}
+ if (table == sysctl_mount_point)
+ sysctl_set_perm_empty_ctl_header(head);
}
static void erase_header(struct ctl_table_header *head)
@@ -232,8 +230,7 @@ static int insert_header(struct ctl_dir *dir, struct ctl_table_header *header)
return -EROFS;
/* Am I creating a permanently empty directory? */
- if (header->ctl_table_size > 0 &&
- sysctl_is_perm_empty_ctl_table(header->ctl_table)) {
+ if (sysctl_is_perm_empty_ctl_header(header)) {
if (!RB_EMPTY_ROOT(&dir->root))
return -EINVAL;
sysctl_set_perm_empty_ctl_header(dir_h);
@@ -480,7 +477,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
}
if (root->set_ownership)
- root->set_ownership(head, table, &inode->i_uid, &inode->i_gid);
+ root->set_ownership(head, &inode->i_uid, &inode->i_gid);
else {
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
@@ -1204,7 +1201,7 @@ static bool get_links(struct ctl_dir *dir,
struct ctl_table *entry, *link;
if (header->ctl_table_size == 0 ||
- sysctl_is_perm_empty_ctl_table(header->ctl_table))
+ sysctl_is_perm_empty_ctl_header(header))
return true;
/* Are there links available for every entry in table? */
diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c
index a878cea70f4c..0256afdd4acf 100644
--- a/fs/tracefs/event_inode.c
+++ b/fs/tracefs/event_inode.c
@@ -345,10 +345,9 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry)
* If the ei is being freed, the ownership of the children
* doesn't matter.
*/
- if (ei->is_freed) {
- ei = NULL;
- break;
- }
+ if (ei->is_freed)
+ return NULL;
+
// Walk upwards until you find the events inode
} while (!ei->is_events);
diff --git a/fs/unicode/Makefile b/fs/unicode/Makefile
index 0e51c0025a16..e309afe2b2bb 100644
--- a/fs/unicode/Makefile
+++ b/fs/unicode/Makefile
@@ -18,13 +18,13 @@ ifdef REGENERATE_UTF8DATA
quiet_cmd_utf8data = GEN $@
cmd_utf8data = $< \
- -a $(srctree)/$(src)/DerivedAge.txt \
- -c $(srctree)/$(src)/DerivedCombiningClass.txt \
- -p $(srctree)/$(src)/DerivedCoreProperties.txt \
- -d $(srctree)/$(src)/UnicodeData.txt \
- -f $(srctree)/$(src)/CaseFolding.txt \
- -n $(srctree)/$(src)/NormalizationCorrections.txt \
- -t $(srctree)/$(src)/NormalizationTest.txt \
+ -a $(src)/DerivedAge.txt \
+ -c $(src)/DerivedCombiningClass.txt \
+ -p $(src)/DerivedCoreProperties.txt \
+ -d $(src)/UnicodeData.txt \
+ -f $(src)/CaseFolding.txt \
+ -n $(src)/NormalizationCorrections.txt \
+ -t $(src)/NormalizationTest.txt \
-o $@
$(obj)/utf8data.c: $(obj)/mkutf8data $(filter %.txt, $(cmd_utf8data)) FORCE
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 76674ad5833e..c5a35e32adf0 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -4,8 +4,8 @@
# All Rights Reserved.
#
-ccflags-y += -I $(srctree)/$(src) # needed for trace events
-ccflags-y += -I $(srctree)/$(src)/libxfs
+ccflags-y += -I $(src) # needed for trace events
+ccflags-y += -I $(src)/libxfs
obj-$(CONFIG_XFS_FS) += xfs.o