summaryrefslogtreecommitdiff
path: root/fs/ocfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/acl.c5
-rw-r--r--fs/ocfs2/alloc.c186
-rw-r--r--fs/ocfs2/alloc.h8
-rw-r--r--fs/ocfs2/aops.c423
-rw-r--r--fs/ocfs2/aops.h19
-rw-r--r--fs/ocfs2/buffer_head_io.c8
-rw-r--r--fs/ocfs2/cluster/heartbeat.c135
-rw-r--r--fs/ocfs2/cluster/masklog.h2
-rw-r--r--fs/ocfs2/cluster/quorum.c8
-rw-r--r--fs/ocfs2/cluster/tcp.c26
-rw-r--r--fs/ocfs2/dcache.c21
-rw-r--r--fs/ocfs2/dir.c152
-rw-r--r--fs/ocfs2/dlm/dlmapi.h4
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c62
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c15
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c23
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c16
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c55
-rw-r--r--fs/ocfs2/dlmglue.c100
-rw-r--r--fs/ocfs2/dlmglue.h6
-rw-r--r--fs/ocfs2/export.c12
-rw-r--r--fs/ocfs2/extent_map.c28
-rw-r--r--fs/ocfs2/file.c98
-rw-r--r--fs/ocfs2/file.h1
-rw-r--r--fs/ocfs2/filecheck.c2
-rw-r--r--fs/ocfs2/inode.c207
-rw-r--r--fs/ocfs2/inode.h3
-rw-r--r--fs/ocfs2/ioctl.c25
-rw-r--r--fs/ocfs2/ioctl.h4
-rw-r--r--fs/ocfs2/journal.c321
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/localalloc.c30
-rw-r--r--fs/ocfs2/locks.c12
-rw-r--r--fs/ocfs2/mmap.c27
-rw-r--r--fs/ocfs2/mmap.h2
-rw-r--r--fs/ocfs2/move_extents.c60
-rw-r--r--fs/ocfs2/namei.c70
-rw-r--r--fs/ocfs2/ocfs2.h46
-rw-r--r--fs/ocfs2/ocfs2_fs.h35
-rw-r--r--fs/ocfs2/ocfs2_ioctl.h2
-rw-r--r--fs/ocfs2/ocfs2_lockid.h2
-rw-r--r--fs/ocfs2/ocfs2_trace.h84
-rw-r--r--fs/ocfs2/quota.h1
-rw-r--r--fs/ocfs2/quota_global.c36
-rw-r--r--fs/ocfs2/quota_local.c27
-rw-r--r--fs/ocfs2/refcounttree.c103
-rw-r--r--fs/ocfs2/reservations.c2
-rw-r--r--fs/ocfs2/reservations.h4
-rw-r--r--fs/ocfs2/resize.c10
-rw-r--r--fs/ocfs2/slot_map.c2
-rw-r--r--fs/ocfs2/stack_o2cb.c4
-rw-r--r--fs/ocfs2/stack_user.c35
-rw-r--r--fs/ocfs2/stackglue.c6
-rw-r--r--fs/ocfs2/stackglue.h4
-rw-r--r--fs/ocfs2/suballoc.c155
-rw-r--r--fs/ocfs2/suballoc.h7
-rw-r--r--fs/ocfs2/super.c638
-rw-r--r--fs/ocfs2/symlink.c16
-rw-r--r--fs/ocfs2/sysfile.c12
-rw-r--r--fs/ocfs2/xattr.c78
-rw-r--r--fs/ocfs2/xattr.h2
61 files changed, 1939 insertions, 1551 deletions
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index e75137a8e7cb..af1e2cedb217 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/fs_struct.h>
#include <cluster/masklog.h>
@@ -193,8 +194,8 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
inode->i_mode = new_mode;
inode_set_ctime_current(inode);
di->i_mode = cpu_to_le16(inode->i_mode);
- di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
- di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
+ di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
+ di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index aef58f1395c8..b267ec580da9 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -566,7 +566,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle,
struct ocfs2_path *path,
struct ocfs2_extent_rec *insert_rec);
/*
- * Reset the actual path elements so that we can re-use the structure
+ * Reset the actual path elements so that we can reuse the structure
* to build another path. Generally, this involves freeing the buffer
* heads.
*/
@@ -967,7 +967,14 @@ int ocfs2_num_free_extents(struct ocfs2_extent_tree *et)
el = &eb->h_list;
}
- BUG_ON(el->l_tree_depth != 0);
+ if (el->l_tree_depth != 0) {
+ retval = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
+ "Owner %llu has leaf extent block %llu with an invalid l_tree_depth of %u\n",
+ (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
+ (unsigned long long)last_eb_blk,
+ le16_to_cpu(el->l_tree_depth));
+ goto bail;
+ }
retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
bail:
@@ -1175,7 +1182,7 @@ static int ocfs2_add_branch(handle_t *handle,
/*
* If there is a gap before the root end and the real end
- * of the righmost leaf block, we need to remove the gap
+ * of the rightmost leaf block, we need to remove the gap
* between new_cpos and root_end first so that the tree
* is consistent after we add a new branch(it will start
* from new_cpos).
@@ -1231,7 +1238,7 @@ static int ocfs2_add_branch(handle_t *handle,
/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
* linked with the rest of the tree.
- * conversly, new_eb_bhs[0] is the new bottommost leaf.
+ * conversely, new_eb_bhs[0] is the new bottommost leaf.
*
* when we leave the loop, new_last_eb_blk will point to the
* newest leaf, and next_blkno will point to the topmost extent
@@ -1796,6 +1803,14 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
el = root_el;
while (el->l_tree_depth) {
+ if (unlikely(le16_to_cpu(el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH)) {
+ ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+ "Owner %llu has invalid tree depth %u in extent list\n",
+ (unsigned long long)ocfs2_metadata_cache_owner(ci),
+ le16_to_cpu(el->l_tree_depth));
+ ret = -EROFS;
+ goto out;
+ }
if (le16_to_cpu(el->l_next_free_rec) == 0) {
ocfs2_error(ocfs2_metadata_cache_get_super(ci),
"Owner %llu has empty extent list at depth %u\n",
@@ -3705,7 +3720,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
* update split_index here.
*
* When the split_index is zero, we need to merge it to the
- * prevoius extent block. It is more efficient and easier
+ * previous extent block. It is more efficient and easier
* if we do merge_right first and merge_left later.
*/
ret = ocfs2_merge_rec_right(path, handle, et, split_rec,
@@ -4510,7 +4525,7 @@ static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et,
}
/*
- * This should only be called against the righmost leaf extent list.
+ * This should only be called against the rightmost leaf extent list.
*
* ocfs2_figure_appending_type() will figure out whether we'll have to
* insert at the tail of the rightmost leaf.
@@ -4760,7 +4775,7 @@ bail:
}
/*
- * Allcate and add clusters into the extent b-tree.
+ * Allocate and add clusters into the extent b-tree.
* The new clusters(clusters_to_add) will be inserted at logical_offset.
* The extent b-tree's root is specified by et, and
* it is not limited to the file storage. Any extent tree can use this
@@ -6147,6 +6162,9 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
int status;
struct inode *inode = NULL;
struct buffer_head *bh = NULL;
+ struct ocfs2_dinode *di;
+ struct ocfs2_truncate_log *tl;
+ unsigned int tl_count;
inode = ocfs2_get_system_file_inode(osb,
TRUNCATE_LOG_SYSTEM_INODE,
@@ -6164,6 +6182,18 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
goto bail;
}
+ di = (struct ocfs2_dinode *)bh->b_data;
+ tl = &di->id2.i_dealloc;
+ tl_count = le16_to_cpu(tl->tl_count);
+ if (unlikely(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
+ tl_count == 0)) {
+ status = -EFSCORRUPTED;
+ iput(inode);
+ brelse(bh);
+ mlog_errno(status);
+ goto bail;
+ }
+
*tl_inode = inode;
*tl_bh = bh;
bail:
@@ -6801,27 +6831,27 @@ static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
return 0;
}
-void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
- unsigned int from, unsigned int to,
- struct page *page, int zero, u64 *phys)
+void ocfs2_map_and_dirty_folio(struct inode *inode, handle_t *handle,
+ size_t from, size_t to, struct folio *folio, int zero,
+ u64 *phys)
{
int ret, partial = 0;
- loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from;
+ loff_t start_byte = folio_pos(folio) + from;
loff_t length = to - from;
- ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
+ ret = ocfs2_map_folio_blocks(folio, phys, inode, from, to, 0);
if (ret)
mlog_errno(ret);
if (zero)
- zero_user_segment(page, from, to);
+ folio_zero_segment(folio, from, to);
/*
* Need to set the buffers we zero'd into uptodate
* here if they aren't - ocfs2_map_page_blocks()
* might've skipped some
*/
- ret = walk_page_buffers(handle, page_buffers(page),
+ ret = walk_page_buffers(handle, folio_buffers(folio),
from, to, &partial,
ocfs2_zero_func);
if (ret < 0)
@@ -6834,92 +6864,88 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
}
if (!partial)
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
}
-static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
- loff_t end, struct page **pages,
- int numpages, u64 phys, handle_t *handle)
+static void ocfs2_zero_cluster_folios(struct inode *inode, loff_t start,
+ loff_t end, struct folio **folios, int numfolios,
+ u64 phys, handle_t *handle)
{
int i;
- struct page *page;
- unsigned int from, to = PAGE_SIZE;
struct super_block *sb = inode->i_sb;
BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
- if (numpages == 0)
+ if (numfolios == 0)
goto out;
- to = PAGE_SIZE;
- for(i = 0; i < numpages; i++) {
- page = pages[i];
+ for (i = 0; i < numfolios; i++) {
+ struct folio *folio = folios[i];
+ size_t to = folio_size(folio);
+ size_t from = offset_in_folio(folio, start);
- from = start & (PAGE_SIZE - 1);
- if ((end >> PAGE_SHIFT) == page->index)
- to = end & (PAGE_SIZE - 1);
+ if (to > end - folio_pos(folio))
+ to = end - folio_pos(folio);
- BUG_ON(from > PAGE_SIZE);
- BUG_ON(to > PAGE_SIZE);
+ ocfs2_map_and_dirty_folio(inode, handle, from, to, folio, 1,
+ &phys);
- ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1,
- &phys);
-
- start = (page->index + 1) << PAGE_SHIFT;
+ start = folio_next_pos(folio);
}
out:
- if (pages)
- ocfs2_unlock_and_free_pages(pages, numpages);
+ if (folios)
+ ocfs2_unlock_and_free_folios(folios, numfolios);
}
-int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
- struct page **pages, int *num)
+static int ocfs2_grab_folios(struct inode *inode, loff_t start, loff_t end,
+ struct folio **folios, int *num)
{
- int numpages, ret = 0;
+ int numfolios, ret = 0;
struct address_space *mapping = inode->i_mapping;
unsigned long index;
loff_t last_page_bytes;
BUG_ON(start > end);
- numpages = 0;
+ numfolios = 0;
last_page_bytes = PAGE_ALIGN(end);
index = start >> PAGE_SHIFT;
do {
- pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
- if (!pages[numpages]) {
- ret = -ENOMEM;
+ folios[numfolios] = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS);
+ if (IS_ERR(folios[numfolios])) {
+ ret = PTR_ERR(folios[numfolios]);
mlog_errno(ret);
+ folios[numfolios] = NULL;
goto out;
}
- numpages++;
- index++;
+ index = folio_next_index(folios[numfolios]);
+ numfolios++;
} while (index < (last_page_bytes >> PAGE_SHIFT));
out:
if (ret != 0) {
- if (pages)
- ocfs2_unlock_and_free_pages(pages, numpages);
- numpages = 0;
+ ocfs2_unlock_and_free_folios(folios, numfolios);
+ numfolios = 0;
}
- *num = numpages;
+ *num = numfolios;
return ret;
}
-static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
- struct page **pages, int *num)
+static int ocfs2_grab_eof_folios(struct inode *inode, loff_t start, loff_t end,
+ struct folio **folios, int *num)
{
struct super_block *sb = inode->i_sb;
BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
(end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
- return ocfs2_grab_pages(inode, start, end, pages, num);
+ return ocfs2_grab_folios(inode, start, end, folios, num);
}
/*
@@ -6927,14 +6953,14 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
* nonzero data on subsequent file extends.
*
* We need to call this before i_size is updated on the inode because
- * otherwise block_write_full_page() will skip writeout of pages past
+ * otherwise block_write_full_folio() will skip writeout of pages past
* i_size.
*/
int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
u64 range_start, u64 range_end)
{
- int ret = 0, numpages;
- struct page **pages = NULL;
+ int ret = 0, numfolios;
+ struct folio **folios = NULL;
u64 phys;
unsigned int ext_flags;
struct super_block *sb = inode->i_sb;
@@ -6947,17 +6973,17 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
return 0;
/*
- * Avoid zeroing pages fully beyond current i_size. It is pointless as
- * underlying blocks of those pages should be already zeroed out and
+ * Avoid zeroing folios fully beyond current i_size. It is pointless as
+ * underlying blocks of those folios should be already zeroed out and
* page writeback will skip them anyway.
*/
range_end = min_t(u64, range_end, i_size_read(inode));
if (range_start >= range_end)
return 0;
- pages = kcalloc(ocfs2_pages_per_cluster(sb),
- sizeof(struct page *), GFP_NOFS);
- if (pages == NULL) {
+ folios = kcalloc(ocfs2_pages_per_cluster(sb),
+ sizeof(struct folio *), GFP_NOFS);
+ if (folios == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
@@ -6978,18 +7004,18 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
if (phys == 0 || ext_flags & OCFS2_EXT_UNWRITTEN)
goto out;
- ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages,
- &numpages);
+ ret = ocfs2_grab_eof_folios(inode, range_start, range_end, folios,
+ &numfolios);
if (ret) {
mlog_errno(ret);
goto out;
}
- ocfs2_zero_cluster_pages(inode, range_start, range_end, pages,
- numpages, phys, handle);
+ ocfs2_zero_cluster_folios(inode, range_start, range_end, folios,
+ numfolios, phys, handle);
/*
- * Initiate writeout of the pages we zero'd here. We don't
+ * Initiate writeout of the folios we zero'd here. We don't
* wait on them - the truncate_inode_pages() call later will
* do that for us.
*/
@@ -6999,7 +7025,7 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
mlog_errno(ret);
out:
- kfree(pages);
+ kfree(folios);
return ret;
}
@@ -7052,7 +7078,7 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh)
{
- int ret, has_data, num_pages = 0;
+ int ret, has_data, num_folios = 0;
int need_free = 0;
u32 bit_off, num;
handle_t *handle;
@@ -7061,7 +7087,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_alloc_context *data_ac = NULL;
- struct page *page = NULL;
+ struct folio *folio = NULL;
struct ocfs2_extent_tree et;
int did_quota = 0;
@@ -7112,12 +7138,12 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
/*
* Save two copies, one for insert, and one that can
- * be changed by ocfs2_map_and_dirty_page() below.
+ * be changed by ocfs2_map_and_dirty_folio() below.
*/
block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
- ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page,
- &num_pages);
+ ret = ocfs2_grab_eof_folios(inode, 0, page_end, &folio,
+ &num_folios);
if (ret) {
mlog_errno(ret);
need_free = 1;
@@ -7128,15 +7154,15 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
* This should populate the 1st page for us and mark
* it up to date.
*/
- ret = ocfs2_read_inline_data(inode, page, di_bh);
+ ret = ocfs2_read_inline_data(inode, folio, di_bh);
if (ret) {
mlog_errno(ret);
need_free = 1;
goto out_unlock;
}
- ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0,
- &phys);
+ ocfs2_map_and_dirty_folio(inode, handle, 0, page_end, folio, 0,
+ &phys);
}
spin_lock(&oi->ip_lock);
@@ -7167,8 +7193,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
out_unlock:
- if (page)
- ocfs2_unlock_and_free_pages(&page, num_pages);
+ if (folio)
+ ocfs2_unlock_and_free_folios(&folio, num_folios);
out_commit:
if (ret < 0 && did_quota)
@@ -7436,10 +7462,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
}
inode->i_blocks = ocfs2_inode_sector_count(inode);
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
- di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
- di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
+ di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime_sec(inode));
+ di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, di_bh);
@@ -7642,7 +7668,7 @@ out_mutex:
goto next_group;
}
out:
- range->len = trimmed * sb->s_blocksize;
+ range->len = trimmed * osb->s_clustersize;
return ret;
}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 4af7abaa6e40..1c0c83362904 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -254,11 +254,9 @@ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
return !rec->e_leaf_clusters;
}
-int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
- struct page **pages, int *num);
-void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
- unsigned int from, unsigned int to,
- struct page *page, int zero, u64 *phys);
+void ocfs2_map_and_dirty_folio(struct inode *inode, handle_t *handle,
+ size_t from, size_t to, struct folio *folio, int zero,
+ u64 *phys);
/*
* Structures which describe a path through a btree, and functions to
* manipulate them.
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0fdba30740ab..76c86f1c2b1c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -46,7 +46,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh = NULL;
struct buffer_head *buffer_cache_bh = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- void *kaddr;
trace_ocfs2_symlink_get_block(
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -91,17 +90,11 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
* could've happened. Since we've got a reference on
* the bh, even if it commits while we're doing the
* copy, the data is still good. */
- if (buffer_jbd(buffer_cache_bh)
- && ocfs2_inode_is_new(inode)) {
- kaddr = kmap_atomic(bh_result->b_page);
- if (!kaddr) {
- mlog(ML_ERROR, "couldn't kmap!\n");
- goto bail;
- }
- memcpy(kaddr + (bh_result->b_size * iblock),
- buffer_cache_bh->b_data,
- bh_result->b_size);
- kunmap_atomic(kaddr);
+ if (buffer_jbd(buffer_cache_bh) && ocfs2_inode_is_new(inode)) {
+ memcpy_to_folio(bh_result->b_folio,
+ bh_result->b_size * iblock,
+ buffer_cache_bh->b_data,
+ bh_result->b_size);
set_buffer_uptodate(bh_result);
}
brelse(buffer_cache_bh);
@@ -156,9 +149,8 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count,
&ext_flags);
if (err) {
- mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
- "%llu, NULL)\n", err, inode, (unsigned long long)iblock,
- (unsigned long long)p_blkno);
+ mlog(ML_ERROR, "get_blocks() failed, inode: 0x%p, "
+ "block: %llu\n", inode, (unsigned long long)iblock);
goto bail;
}
@@ -216,10 +208,9 @@ bail:
return err;
}
-int ocfs2_read_inline_data(struct inode *inode, struct page *page,
+int ocfs2_read_inline_data(struct inode *inode, struct folio *folio,
struct buffer_head *di_bh)
{
- void *kaddr;
loff_t size;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -231,7 +222,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
size = i_size_read(inode);
- if (size > PAGE_SIZE ||
+ if (size > folio_size(folio) ||
size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
ocfs2_error(inode->i_sb,
"Inode %llu has with inline data has bad size: %Lu\n",
@@ -240,25 +231,18 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
return -EROFS;
}
- kaddr = kmap_atomic(page);
- if (size)
- memcpy(kaddr, di->id2.i_data.id_data, size);
- /* Clear the remaining part of the page */
- memset(kaddr + size, 0, PAGE_SIZE - size);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
-
- SetPageUptodate(page);
+ folio_fill_tail(folio, 0, di->id2.i_data.id_data, size);
+ folio_mark_uptodate(folio);
return 0;
}
-static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
+static int ocfs2_readpage_inline(struct inode *inode, struct folio *folio)
{
int ret;
struct buffer_head *di_bh = NULL;
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
ret = ocfs2_read_inode_block(inode, &di_bh);
@@ -267,9 +251,9 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
goto out;
}
- ret = ocfs2_read_inline_data(inode, page, di_bh);
+ ret = ocfs2_read_inline_data(inode, folio, di_bh);
out:
- unlock_page(page);
+ folio_unlock(folio);
brelse(di_bh);
return ret;
@@ -284,7 +268,7 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio)
trace_ocfs2_readpage((unsigned long long)oi->ip_blkno, folio->index);
- ret = ocfs2_inode_lock_with_page(inode, NULL, 0, &folio->page);
+ ret = ocfs2_inode_lock_with_folio(inode, NULL, 0, folio);
if (ret != 0) {
if (ret == AOP_TRUNCATED_PAGE)
unlock = 0;
@@ -306,7 +290,7 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio)
}
/*
- * i_size might have just been updated as we grabed the meta lock. We
+ * i_size might have just been updated as we grabbed the meta lock. We
* might now be discovering a truncate that hit on another node.
* block_read_full_folio->get_block freaks out if it is asked to read
* beyond the end of a file, so we check here. Callers
@@ -323,7 +307,7 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio)
}
if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
- ret = ocfs2_readpage_inline(inode, &folio->page);
+ ret = ocfs2_readpage_inline(inode, folio);
else
ret = block_read_full_folio(folio, ocfs2_get_block);
unlock = 0;
@@ -389,21 +373,18 @@ out_unlock:
/* Note: Because we don't support holes, our allocation has
* already happened (allocation writes zeros to the file data)
* so we don't have to worry about ordered writes in
- * ocfs2_writepage.
+ * ocfs2_writepages.
*
- * ->writepage is called during the process of invalidating the page cache
+ * ->writepages is called during the process of invalidating the page cache
* during blocked lock processing. It can't block on any cluster locks
* to during block mapping. It's relying on the fact that the block
* mapping can't have disappeared under the dirty pages that it is
* being asked to write back.
*/
-static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
+static int ocfs2_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
{
- trace_ocfs2_writepage(
- (unsigned long long)OCFS2_I(page->mapping->host)->ip_blkno,
- page->index);
-
- return block_write_full_page(page, ocfs2_get_block, wbc);
+ return mpage_writepages(mapping, wbc, ocfs2_get_block);
}
/* Taken from ext3. We don't necessarily need the full blown
@@ -538,7 +519,7 @@ static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
*
* from == to == 0 is code for "zero the entire cluster region"
*/
-static void ocfs2_clear_page_regions(struct page *page,
+static void ocfs2_clear_folio_regions(struct folio *folio,
struct ocfs2_super *osb, u32 cpos,
unsigned from, unsigned to)
{
@@ -547,7 +528,7 @@ static void ocfs2_clear_page_regions(struct page *page,
ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end);
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_folio(folio, 0);
if (from || to) {
if (from > cluster_start)
@@ -558,20 +539,20 @@ static void ocfs2_clear_page_regions(struct page *page,
memset(kaddr + cluster_start, 0, cluster_end - cluster_start);
}
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
}
/*
* Nonsparse file systems fully allocate before we get to the write
* code. This prevents ocfs2_write() from tagging the write as an
- * allocating one, which means ocfs2_map_page_blocks() might try to
+ * allocating one, which means ocfs2_map_folio_blocks() might try to
* read-in the blocks at the tail of our file. Avoid reading them by
* testing i_size against each block offset.
*/
-static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
+static int ocfs2_should_read_blk(struct inode *inode, struct folio *folio,
unsigned int block_start)
{
- u64 offset = page_offset(page) + block_start;
+ u64 offset = folio_pos(folio) + block_start;
if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
return 1;
@@ -589,7 +570,7 @@ static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
*
* This will also skip zeroing, which is handled externally.
*/
-int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
+int ocfs2_map_folio_blocks(struct folio *folio, u64 *p_blkno,
struct inode *inode, unsigned int from,
unsigned int to, int new)
{
@@ -598,10 +579,10 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
unsigned int block_end, block_start;
unsigned int bsize = i_blocksize(inode);
- if (!page_has_buffers(page))
- create_empty_buffers(page, bsize, 0);
+ head = folio_buffers(folio);
+ if (!head)
+ head = create_empty_buffers(folio, bsize, 0);
- head = page_buffers(page);
for (bh = head, block_start = 0; bh != head || !block_start;
bh = bh->b_this_page, block_start += bsize) {
block_end = block_start + bsize;
@@ -613,7 +594,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
* they may belong to unallocated clusters.
*/
if (block_start >= to || block_end <= from) {
- if (PageUptodate(page))
+ if (folio_test_uptodate(folio))
set_buffer_uptodate(bh);
continue;
}
@@ -630,11 +611,11 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
clean_bdev_bh_alias(bh);
}
- if (PageUptodate(page)) {
+ if (folio_test_uptodate(folio)) {
set_buffer_uptodate(bh);
} else if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
!buffer_new(bh) &&
- ocfs2_should_read_blk(inode, page, block_start) &&
+ ocfs2_should_read_blk(inode, folio, block_start) &&
(block_start < from || block_end > to)) {
bh_read_nowait(bh, 0);
*wait_bh++=bh;
@@ -668,7 +649,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
if (block_start >= to)
break;
- zero_user(page, block_start, bh->b_size);
+ folio_zero_range(folio, block_start, bh->b_size);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
@@ -732,24 +713,24 @@ struct ocfs2_write_ctxt {
unsigned int w_large_pages;
/*
- * Pages involved in this write.
+ * Folios involved in this write.
*
- * w_target_page is the page being written to by the user.
+ * w_target_folio is the folio being written to by the user.
*
- * w_pages is an array of pages which always contains
- * w_target_page, and in the case of an allocating write with
+ * w_folios is an array of folios which always contains
+ * w_target_folio, and in the case of an allocating write with
* page_size < cluster size, it will contain zero'd and mapped
- * pages adjacent to w_target_page which need to be written
+ * pages adjacent to w_target_folio which need to be written
* out in so that future reads from that region will get
* zero's.
*/
- unsigned int w_num_pages;
- struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
- struct page *w_target_page;
+ unsigned int w_num_folios;
+ struct folio *w_folios[OCFS2_MAX_CTXT_PAGES];
+ struct folio *w_target_folio;
/*
* w_target_locked is used for page_mkwrite path indicating no unlocking
- * against w_target_page in ocfs2_write_end_nolock.
+ * against w_target_folio in ocfs2_write_end_nolock.
*/
unsigned int w_target_locked:1;
@@ -774,40 +755,40 @@ struct ocfs2_write_ctxt {
unsigned int w_unwritten_count;
};
-void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
+void ocfs2_unlock_and_free_folios(struct folio **folios, int num_folios)
{
int i;
- for(i = 0; i < num_pages; i++) {
- if (pages[i]) {
- unlock_page(pages[i]);
- mark_page_accessed(pages[i]);
- put_page(pages[i]);
- }
+ for(i = 0; i < num_folios; i++) {
+ if (!folios[i])
+ continue;
+ folio_unlock(folios[i]);
+ folio_mark_accessed(folios[i]);
+ folio_put(folios[i]);
}
}
-static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
+static void ocfs2_unlock_folios(struct ocfs2_write_ctxt *wc)
{
int i;
/*
* w_target_locked is only set to true in the page_mkwrite() case.
* The intent is to allow us to lock the target page from write_begin()
- * to write_end(). The caller must hold a ref on w_target_page.
+ * to write_end(). The caller must hold a ref on w_target_folio.
*/
if (wc->w_target_locked) {
- BUG_ON(!wc->w_target_page);
- for (i = 0; i < wc->w_num_pages; i++) {
- if (wc->w_target_page == wc->w_pages[i]) {
- wc->w_pages[i] = NULL;
+ BUG_ON(!wc->w_target_folio);
+ for (i = 0; i < wc->w_num_folios; i++) {
+ if (wc->w_target_folio == wc->w_folios[i]) {
+ wc->w_folios[i] = NULL;
break;
}
}
- mark_page_accessed(wc->w_target_page);
- put_page(wc->w_target_page);
+ folio_mark_accessed(wc->w_target_folio);
+ folio_put(wc->w_target_folio);
}
- ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
+ ocfs2_unlock_and_free_folios(wc->w_folios, wc->w_num_folios);
}
static void ocfs2_free_unwritten_list(struct inode *inode,
@@ -829,7 +810,7 @@ static void ocfs2_free_write_ctxt(struct inode *inode,
struct ocfs2_write_ctxt *wc)
{
ocfs2_free_unwritten_list(inode, &wc->w_unwritten_list);
- ocfs2_unlock_pages(wc);
+ ocfs2_unlock_folios(wc);
brelse(wc->w_di_bh);
kfree(wc);
}
@@ -872,29 +853,30 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
* and dirty so they'll be written out (in order to prevent uninitialised
* block data from leaking). And clear the new bit.
*/
-static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to)
+static void ocfs2_zero_new_buffers(struct folio *folio, size_t from, size_t to)
{
unsigned int block_start, block_end;
struct buffer_head *head, *bh;
- BUG_ON(!PageLocked(page));
- if (!page_has_buffers(page))
+ BUG_ON(!folio_test_locked(folio));
+ head = folio_buffers(folio);
+ if (!head)
return;
- bh = head = page_buffers(page);
+ bh = head;
block_start = 0;
do {
block_end = block_start + bh->b_size;
if (buffer_new(bh)) {
if (block_end > from && block_start < to) {
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
unsigned start, end;
start = max(from, block_start);
end = min(to, block_end);
- zero_user_segment(page, start, end);
+ folio_zero_segment(folio, start, end);
set_buffer_uptodate(bh);
}
@@ -919,29 +901,26 @@ static void ocfs2_write_failure(struct inode *inode,
int i;
unsigned from = user_pos & (PAGE_SIZE - 1),
to = user_pos + user_len;
- struct page *tmppage;
- if (wc->w_target_page)
- ocfs2_zero_new_buffers(wc->w_target_page, from, to);
+ if (wc->w_target_folio)
+ ocfs2_zero_new_buffers(wc->w_target_folio, from, to);
- for(i = 0; i < wc->w_num_pages; i++) {
- tmppage = wc->w_pages[i];
+ for (i = 0; i < wc->w_num_folios; i++) {
+ struct folio *folio = wc->w_folios[i];
- if (tmppage && page_has_buffers(tmppage)) {
+ if (folio && folio_buffers(folio)) {
if (ocfs2_should_order_data(inode))
ocfs2_jbd2_inode_add_write(wc->w_handle, inode,
user_pos, user_len);
- block_commit_write(tmppage, from, to);
+ block_commit_write(folio, from, to);
}
}
}
-static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
- struct ocfs2_write_ctxt *wc,
- struct page *page, u32 cpos,
- loff_t user_pos, unsigned user_len,
- int new)
+static int ocfs2_prepare_folio_for_write(struct inode *inode, u64 *p_blkno,
+ struct ocfs2_write_ctxt *wc, struct folio *folio, u32 cpos,
+ loff_t user_pos, unsigned user_len, int new)
{
int ret;
unsigned int map_from = 0, map_to = 0;
@@ -954,20 +933,19 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
/* treat the write as new if the a hole/lseek spanned across
* the page boundary.
*/
- new = new | ((i_size_read(inode) <= page_offset(page)) &&
- (page_offset(page) <= user_pos));
+ new = new | ((i_size_read(inode) <= folio_pos(folio)) &&
+ (folio_pos(folio) <= user_pos));
- if (page == wc->w_target_page) {
+ if (folio == wc->w_target_folio) {
map_from = user_pos & (PAGE_SIZE - 1);
map_to = map_from + user_len;
if (new)
- ret = ocfs2_map_page_blocks(page, p_blkno, inode,
- cluster_start, cluster_end,
- new);
+ ret = ocfs2_map_folio_blocks(folio, p_blkno, inode,
+ cluster_start, cluster_end, new);
else
- ret = ocfs2_map_page_blocks(page, p_blkno, inode,
- map_from, map_to, new);
+ ret = ocfs2_map_folio_blocks(folio, p_blkno, inode,
+ map_from, map_to, new);
if (ret) {
mlog_errno(ret);
goto out;
@@ -981,7 +959,7 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
}
} else {
/*
- * If we haven't allocated the new page yet, we
+ * If we haven't allocated the new folio yet, we
* shouldn't be writing it out without copying user
* data. This is likely a math error from the caller.
*/
@@ -990,8 +968,8 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
map_from = cluster_start;
map_to = cluster_end;
- ret = ocfs2_map_page_blocks(page, p_blkno, inode,
- cluster_start, cluster_end, new);
+ ret = ocfs2_map_folio_blocks(folio, p_blkno, inode,
+ cluster_start, cluster_end, new);
if (ret) {
mlog_errno(ret);
goto out;
@@ -999,20 +977,20 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
}
/*
- * Parts of newly allocated pages need to be zero'd.
+ * Parts of newly allocated folios need to be zero'd.
*
* Above, we have also rewritten 'to' and 'from' - as far as
* the rest of the function is concerned, the entire cluster
- * range inside of a page needs to be written.
+ * range inside of a folio needs to be written.
*
- * We can skip this if the page is up to date - it's already
+ * We can skip this if the folio is uptodate - it's already
* been zero'd from being read in as a hole.
*/
- if (new && !PageUptodate(page))
- ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb),
+ if (new && !folio_test_uptodate(folio))
+ ocfs2_clear_folio_regions(folio, OCFS2_SB(inode->i_sb),
cpos, user_data_from, user_data_to);
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
out:
return ret;
@@ -1021,11 +999,9 @@ out:
/*
* This function will only grab one clusters worth of pages.
*/
-static int ocfs2_grab_pages_for_write(struct address_space *mapping,
- struct ocfs2_write_ctxt *wc,
- u32 cpos, loff_t user_pos,
- unsigned user_len, int new,
- struct page *mmap_page)
+static int ocfs2_grab_folios_for_write(struct address_space *mapping,
+ struct ocfs2_write_ctxt *wc, u32 cpos, loff_t user_pos,
+ unsigned user_len, int new, struct folio *mmap_folio)
{
int ret = 0, i;
unsigned long start, target_index, end_index, index;
@@ -1042,7 +1018,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
* last page of the write.
*/
if (new) {
- wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
+ wc->w_num_folios = ocfs2_pages_per_cluster(inode->i_sb);
start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
/*
* We need the index *past* the last page we could possibly
@@ -1052,15 +1028,15 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
last_byte = max(user_pos + user_len, i_size_read(inode));
BUG_ON(last_byte < 1);
end_index = ((last_byte - 1) >> PAGE_SHIFT) + 1;
- if ((start + wc->w_num_pages) > end_index)
- wc->w_num_pages = end_index - start;
+ if ((start + wc->w_num_folios) > end_index)
+ wc->w_num_folios = end_index - start;
} else {
- wc->w_num_pages = 1;
+ wc->w_num_folios = 1;
start = target_index;
}
end_index = (user_pos + user_len - 1) >> PAGE_SHIFT;
- for(i = 0; i < wc->w_num_pages; i++) {
+ for(i = 0; i < wc->w_num_folios; i++) {
index = start + i;
if (index >= target_index && index <= end_index &&
@@ -1070,37 +1046,39 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
* and wants us to directly use the page
* passed in.
*/
- lock_page(mmap_page);
+ folio_lock(mmap_folio);
/* Exit and let the caller retry */
- if (mmap_page->mapping != mapping) {
- WARN_ON(mmap_page->mapping);
- unlock_page(mmap_page);
+ if (mmap_folio->mapping != mapping) {
+ WARN_ON(mmap_folio->mapping);
+ folio_unlock(mmap_folio);
ret = -EAGAIN;
goto out;
}
- get_page(mmap_page);
- wc->w_pages[i] = mmap_page;
+ folio_get(mmap_folio);
+ wc->w_folios[i] = mmap_folio;
wc->w_target_locked = true;
} else if (index >= target_index && index <= end_index &&
wc->w_type == OCFS2_WRITE_DIRECT) {
/* Direct write has no mapping page. */
- wc->w_pages[i] = NULL;
+ wc->w_folios[i] = NULL;
continue;
} else {
- wc->w_pages[i] = find_or_create_page(mapping, index,
- GFP_NOFS);
- if (!wc->w_pages[i]) {
- ret = -ENOMEM;
+ wc->w_folios[i] = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ GFP_NOFS);
+ if (IS_ERR(wc->w_folios[i])) {
+ ret = PTR_ERR(wc->w_folios[i]);
mlog_errno(ret);
+ wc->w_folios[i] = NULL;
goto out;
}
}
- wait_for_stable_page(wc->w_pages[i]);
+ folio_wait_stable(wc->w_folios[i]);
if (index == target_index)
- wc->w_target_page = wc->w_pages[i];
+ wc->w_target_folio = wc->w_folios[i];
}
out:
if (ret)
@@ -1184,19 +1162,18 @@ static int ocfs2_write_cluster(struct address_space *mapping,
if (!should_zero)
p_blkno += (user_pos >> inode->i_sb->s_blocksize_bits) & (u64)(bpc - 1);
- for(i = 0; i < wc->w_num_pages; i++) {
+ for (i = 0; i < wc->w_num_folios; i++) {
int tmpret;
/* This is the direct io target page. */
- if (wc->w_pages[i] == NULL) {
- p_blkno++;
+ if (wc->w_folios[i] == NULL) {
+ p_blkno += (1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits));
continue;
}
- tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,
- wc->w_pages[i], cpos,
- user_pos, user_len,
- should_zero);
+ tmpret = ocfs2_prepare_folio_for_write(inode, &p_blkno, wc,
+ wc->w_folios[i], cpos, user_pos, user_len,
+ should_zero);
if (tmpret) {
mlog_errno(tmpret);
if (ret == 0)
@@ -1475,7 +1452,7 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
{
int ret;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct page *page;
+ struct folio *folio;
handle_t *handle;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
@@ -1486,19 +1463,21 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
goto out;
}
- page = find_or_create_page(mapping, 0, GFP_NOFS);
- if (!page) {
+ folio = __filemap_get_folio(mapping, 0,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS);
+ if (IS_ERR(folio)) {
ocfs2_commit_trans(osb, handle);
- ret = -ENOMEM;
+ ret = PTR_ERR(folio);
mlog_errno(ret);
goto out;
}
/*
- * If we don't set w_num_pages then this page won't get unlocked
+ * If we don't set w_num_folios then this folio won't get unlocked
* and freed on cleanup of the write context.
*/
- wc->w_pages[0] = wc->w_target_page = page;
- wc->w_num_pages = 1;
+ wc->w_target_folio = folio;
+ wc->w_folios[0] = folio;
+ wc->w_num_folios = 1;
ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1512,8 +1491,8 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
ocfs2_set_inode_data_inline(inode, di);
- if (!PageUptodate(page)) {
- ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh);
+ if (!folio_test_uptodate(folio)) {
+ ret = ocfs2_read_inline_data(inode, folio, wc->w_di_bh);
if (ret) {
ocfs2_commit_trans(osb, handle);
@@ -1536,9 +1515,8 @@ int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size)
}
static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
- struct inode *inode, loff_t pos,
- unsigned len, struct page *mmap_page,
- struct ocfs2_write_ctxt *wc)
+ struct inode *inode, loff_t pos, size_t len,
+ struct folio *mmap_folio, struct ocfs2_write_ctxt *wc)
{
int ret, written = 0;
loff_t end = pos + len;
@@ -1553,7 +1531,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
* Handle inodes which already have inline data 1st.
*/
if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
- if (mmap_page == NULL &&
+ if (mmap_folio == NULL &&
ocfs2_size_fits_inline_data(wc->w_di_bh, end))
goto do_inline_write;
@@ -1577,7 +1555,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
* Check whether the write can fit.
*/
di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
- if (mmap_page ||
+ if (mmap_folio ||
end > ocfs2_max_inline_data_with_xattr(inode->i_sb, di))
return 0;
@@ -1644,9 +1622,9 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
}
int ocfs2_write_begin_nolock(struct address_space *mapping,
- loff_t pos, unsigned len, ocfs2_write_type_t type,
- struct page **pagep, void **fsdata,
- struct buffer_head *di_bh, struct page *mmap_page)
+ loff_t pos, unsigned len, ocfs2_write_type_t type,
+ struct folio **foliop, void **fsdata,
+ struct buffer_head *di_bh, struct folio *mmap_folio)
{
int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
@@ -1669,7 +1647,7 @@ try_again:
if (ocfs2_supports_inline_data(osb)) {
ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len,
- mmap_page, wc);
+ mmap_folio, wc);
if (ret == 1) {
ret = 0;
goto success;
@@ -1721,7 +1699,7 @@ try_again:
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(long long)i_size_read(inode),
le32_to_cpu(di->i_clusters),
- pos, len, type, mmap_page,
+ pos, len, type, mmap_folio,
clusters_to_alloc, extents_to_split);
/*
@@ -1792,21 +1770,21 @@ try_again:
}
/*
- * Fill our page array first. That way we've grabbed enough so
+ * Fill our folio array first. That way we've grabbed enough so
* that we can zero and flush if we error after adding the
* extent.
*/
- ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
- cluster_of_pages, mmap_page);
+ ret = ocfs2_grab_folios_for_write(mapping, wc, wc->w_cpos, pos, len,
+ cluster_of_pages, mmap_folio);
if (ret) {
/*
- * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
- * the target page. In this case, we exit with no error and no target
- * page. This will trigger the caller, page_mkwrite(), to re-try
- * the operation.
+ * ocfs2_grab_folios_for_write() returns -EAGAIN if it
+ * could not lock the target folio. In this case, we exit
+ * with no error and no target folio. This will trigger
+ * the caller, page_mkwrite(), to re-try the operation.
*/
if (type == OCFS2_WRITE_MMAP && ret == -EAGAIN) {
- BUG_ON(wc->w_target_page);
+ BUG_ON(wc->w_target_folio);
ret = 0;
goto out_quota;
}
@@ -1828,8 +1806,8 @@ try_again:
ocfs2_free_alloc_context(meta_ac);
success:
- if (pagep)
- *pagep = wc->w_target_page;
+ if (foliop)
+ *foliop = wc->w_target_folio;
*fsdata = wc;
return 0;
out_quota:
@@ -1848,7 +1826,7 @@ out:
* to VM code.
*/
if (wc->w_target_locked)
- unlock_page(mmap_page);
+ folio_unlock(mmap_folio);
ocfs2_free_write_ctxt(inode, wc);
@@ -1879,9 +1857,10 @@ out:
return ret;
}
-static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
+static int ocfs2_write_begin(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
struct buffer_head *di_bh = NULL;
@@ -1903,7 +1882,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
down_write(&OCFS2_I(inode)->ip_alloc_sem);
ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_BUFFER,
- pagep, fsdata, di_bh, NULL);
+ foliop, fsdata, di_bh, NULL);
if (ret) {
mlog_errno(ret);
goto out_fail;
@@ -1927,18 +1906,15 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
struct ocfs2_dinode *di,
struct ocfs2_write_ctxt *wc)
{
- void *kaddr;
-
if (unlikely(*copied < len)) {
- if (!PageUptodate(wc->w_target_page)) {
+ if (!folio_test_uptodate(wc->w_target_folio)) {
*copied = 0;
return;
}
}
- kaddr = kmap_atomic(wc->w_target_page);
- memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
- kunmap_atomic(kaddr);
+ memcpy_from_folio(di->id2.i_data.id_data + pos, wc->w_target_folio,
+ pos, *copied);
trace_ocfs2_write_end_inline(
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1947,17 +1923,16 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
le16_to_cpu(di->i_dyn_features));
}
-int ocfs2_write_end_nolock(struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied, void *fsdata)
+int ocfs2_write_end_nolock(struct address_space *mapping, loff_t pos,
+ unsigned len, unsigned copied, void *fsdata)
{
int i, ret;
- unsigned from, to, start = pos & (PAGE_SIZE - 1);
+ size_t from, to, start = pos & (PAGE_SIZE - 1);
struct inode *inode = mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_write_ctxt *wc = fsdata;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
handle_t *handle = wc->w_handle;
- struct page *tmppage;
BUG_ON(!list_empty(&wc->w_unwritten_list));
@@ -1976,44 +1951,44 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
goto out_write_size;
}
- if (unlikely(copied < len) && wc->w_target_page) {
+ if (unlikely(copied < len) && wc->w_target_folio) {
loff_t new_isize;
- if (!PageUptodate(wc->w_target_page))
+ if (!folio_test_uptodate(wc->w_target_folio))
copied = 0;
new_isize = max_t(loff_t, i_size_read(inode), pos + copied);
- if (new_isize > page_offset(wc->w_target_page))
- ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
+ if (new_isize > folio_pos(wc->w_target_folio))
+ ocfs2_zero_new_buffers(wc->w_target_folio, start+copied,
start+len);
else {
/*
- * When page is fully beyond new isize (data copy
- * failed), do not bother zeroing the page. Invalidate
+ * When folio is fully beyond new isize (data copy
+ * failed), do not bother zeroing the folio. Invalidate
* it instead so that writeback does not get confused
* put page & buffer dirty bits into inconsistent
* state.
*/
- block_invalidate_folio(page_folio(wc->w_target_page),
- 0, PAGE_SIZE);
+ block_invalidate_folio(wc->w_target_folio, 0,
+ folio_size(wc->w_target_folio));
}
}
- if (wc->w_target_page)
- flush_dcache_page(wc->w_target_page);
+ if (wc->w_target_folio)
+ flush_dcache_folio(wc->w_target_folio);
- for(i = 0; i < wc->w_num_pages; i++) {
- tmppage = wc->w_pages[i];
+ for (i = 0; i < wc->w_num_folios; i++) {
+ struct folio *folio = wc->w_folios[i];
- /* This is the direct io target page. */
- if (tmppage == NULL)
+ /* This is the direct io target folio */
+ if (folio == NULL)
continue;
- if (tmppage == wc->w_target_page) {
+ if (folio == wc->w_target_folio) {
from = wc->w_target_from;
to = wc->w_target_to;
- BUG_ON(from > PAGE_SIZE ||
- to > PAGE_SIZE ||
+ BUG_ON(from > folio_size(folio) ||
+ to > folio_size(folio) ||
to < from);
} else {
/*
@@ -2022,19 +1997,17 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
* to flush their entire range.
*/
from = 0;
- to = PAGE_SIZE;
+ to = folio_size(folio);
}
- if (page_has_buffers(tmppage)) {
+ if (folio_buffers(folio)) {
if (handle && ocfs2_should_order_data(inode)) {
- loff_t start_byte =
- ((loff_t)tmppage->index << PAGE_SHIFT) +
- from;
+ loff_t start_byte = folio_pos(folio) + from;
loff_t length = to - from;
ocfs2_jbd2_inode_add_write(handle, inode,
start_byte, length);
}
- block_commit_write(tmppage, from, to);
+ block_commit_write(folio, from, to);
}
}
@@ -2048,9 +2021,9 @@ out_write_size:
}
inode->i_blocks = ocfs2_inode_sector_count(inode);
di->i_size = cpu_to_le64((u64)i_size_read(inode));
- inode->i_mtime = inode_set_ctime_current(inode);
- di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
- di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
+ di->i_mtime = di->i_ctime = cpu_to_le64(inode_get_mtime_sec(inode));
+ di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
if (handle)
ocfs2_update_inode_fsync_trans(handle, inode, 1);
}
@@ -2063,7 +2036,7 @@ out:
* this lock and will ask for the page lock when flushing the data.
* put it here to preserve the unlock order.
*/
- ocfs2_unlock_pages(wc);
+ ocfs2_unlock_folios(wc);
if (handle)
ocfs2_commit_trans(osb, handle);
@@ -2076,9 +2049,10 @@ out:
return copied;
}
-static int ocfs2_write_end(struct file *file, struct address_space *mapping,
+static int ocfs2_write_end(const struct kiocb *iocb,
+ struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
int ret;
struct inode *inode = mapping->host;
@@ -2285,8 +2259,6 @@ unlock:
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
out:
- if (ret < 0)
- ret = -EIO;
return ret;
}
@@ -2370,6 +2342,11 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
}
list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
+ ret = ocfs2_assure_trans_credits(handle, credits);
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
ret = ocfs2_mark_extent_written(inode, &et, handle,
ue->ue_cpos, 1,
ue->ue_phys,
@@ -2470,7 +2447,7 @@ const struct address_space_operations ocfs2_aops = {
.dirty_folio = block_dirty_folio,
.read_folio = ocfs2_read_folio,
.readahead = ocfs2_readahead,
- .writepage = ocfs2_writepage,
+ .writepages = ocfs2_writepages,
.write_begin = ocfs2_write_begin,
.write_end = ocfs2_write_end,
.bmap = ocfs2_bmap,
@@ -2479,5 +2456,5 @@ const struct address_space_operations ocfs2_aops = {
.release_folio = ocfs2_release_folio,
.migrate_folio = buffer_migrate_folio,
.is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_page = generic_error_remove_page,
+ .error_remove_folio = generic_error_remove_folio,
};
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 3a520117fa59..114efc9111e4 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -8,16 +8,11 @@
#include <linux/fs.h>
-handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
- struct page *page,
- unsigned from,
- unsigned to);
-
-int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
+int ocfs2_map_folio_blocks(struct folio *folio, u64 *p_blkno,
struct inode *inode, unsigned int from,
unsigned int to, int new);
-void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages);
+void ocfs2_unlock_and_free_folios(struct folio **folios, int num_folios);
int walk_page_buffers( handle_t *handle,
struct buffer_head *head,
@@ -37,11 +32,11 @@ typedef enum {
} ocfs2_write_type_t;
int ocfs2_write_begin_nolock(struct address_space *mapping,
- loff_t pos, unsigned len, ocfs2_write_type_t type,
- struct page **pagep, void **fsdata,
- struct buffer_head *di_bh, struct page *mmap_page);
+ loff_t pos, unsigned len, ocfs2_write_type_t type,
+ struct folio **foliop, void **fsdata,
+ struct buffer_head *di_bh, struct folio *mmap_folio);
-int ocfs2_read_inline_data(struct inode *inode, struct page *page,
+int ocfs2_read_inline_data(struct inode *inode, struct folio *folio,
struct buffer_head *di_bh);
int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
@@ -70,6 +65,8 @@ enum ocfs2_iocb_lock_bits {
OCFS2_IOCB_NUM_LOCKS
};
+#define ocfs2_iocb_init_rw_locked(iocb) \
+ (iocb->private = NULL)
#define ocfs2_iocb_clear_rw_locked(iocb) \
clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
#define ocfs2_iocb_rw_locked_level(iocb) \
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 196638a22b48..8f714406528d 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -158,7 +158,7 @@ read_failure:
if (new_bh && bh) {
/* If middle bh fails, let previous bh
* finish its read and then put it to
- * aovoid bh leak
+ * avoid bh leak
*/
if (!buffer_jbd(bh))
wait_on_buffer(bh);
@@ -235,7 +235,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(sb, block++);
if (bhs[i] == NULL) {
- ocfs2_metadata_cache_io_unlock(ci);
status = -ENOMEM;
mlog_errno(status);
/* Don't forget to put previous bh! */
@@ -345,7 +344,7 @@ read_failure:
if (new_bh && bh) {
/* If middle bh fails, let previous bh
* finish its read and then put it to
- * aovoid bh leak
+ * avoid bh leak
*/
if (!buffer_jbd(bh))
wait_on_buffer(bh);
@@ -389,7 +388,8 @@ read_failure:
/* Always set the buffer in the cache, even if it was
* a forced read, or read-ahead which hasn't yet
* completed. */
- ocfs2_set_buffer_uptodate(ci, bh);
+ if (bh)
+ ocfs2_set_buffer_uptodate(ci, bh);
}
ocfs2_metadata_cache_io_unlock(ci);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 21472e3ed182..724350925aff 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -3,6 +3,7 @@
* Copyright (C) 2004, 2005 Oracle. All rights reserved.
*/
+#include "linux/kstrtox.h"
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/jiffies.h>
@@ -213,7 +214,7 @@ struct o2hb_region {
unsigned int hr_num_pages;
struct page **hr_slot_data;
- struct block_device *hr_bdev;
+ struct file *hr_bdev_file;
struct o2hb_disk_slot *hr_slots;
/* live node map of this region */
@@ -261,6 +262,11 @@ struct o2hb_region {
int hr_last_hb_status;
};
+static inline struct block_device *reg_bdev(struct o2hb_region *reg)
+{
+ return reg->hr_bdev_file ? file_bdev(reg->hr_bdev_file) : NULL;
+}
+
struct o2hb_bio_wait_ctxt {
atomic_t wc_num_reqs;
struct completion wc_io_complete;
@@ -286,7 +292,7 @@ static void o2hb_write_timeout(struct work_struct *work)
hr_write_timeout_work.work);
mlog(ML_ERROR, "Heartbeat write timeout to device %pg after %u "
- "milliseconds\n", reg->hr_bdev,
+ "milliseconds\n", reg_bdev(reg),
jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
if (o2hb_global_heartbeat_active()) {
@@ -383,7 +389,7 @@ static void o2hb_nego_timeout(struct work_struct *work)
if (!test_bit(master_node, reg->hr_nego_node_bitmap)) {
printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%pg).\n",
o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000,
- config_item_name(&reg->hr_item), reg->hr_bdev);
+ config_item_name(&reg->hr_item), reg_bdev(reg));
set_bit(master_node, reg->hr_nego_node_bitmap);
}
if (!bitmap_equal(reg->hr_nego_node_bitmap, live_node_bitmap,
@@ -398,7 +404,8 @@ static void o2hb_nego_timeout(struct work_struct *work)
}
printk(KERN_NOTICE "o2hb: all nodes hb write hung, maybe region %s (%pg) is down.\n",
- config_item_name(&reg->hr_item), reg->hr_bdev);
+ config_item_name(&reg->hr_item),
+ reg_bdev(reg));
/* approve negotiate timeout request. */
o2hb_arm_timeout(reg);
@@ -419,7 +426,7 @@ static void o2hb_nego_timeout(struct work_struct *work)
/* negotiate timeout with master node. */
printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%pg), negotiate timeout with node %d.\n",
o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, config_item_name(&reg->hr_item),
- reg->hr_bdev, master_node);
+ reg_bdev(reg), master_node);
ret = o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG,
master_node);
if (ret)
@@ -436,7 +443,8 @@ static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data,
nego_msg = (struct o2hb_nego_msg *)msg->buf;
printk(KERN_NOTICE "o2hb: receive negotiate timeout message from node %d on region %s (%pg).\n",
- nego_msg->node_num, config_item_name(&reg->hr_item), reg->hr_bdev);
+ nego_msg->node_num, config_item_name(&reg->hr_item),
+ reg_bdev(reg));
if (nego_msg->node_num < O2NM_MAX_NODES)
set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap);
else
@@ -451,7 +459,7 @@ static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data,
struct o2hb_region *reg = data;
printk(KERN_NOTICE "o2hb: negotiate timeout approved by master node on region %s (%pg).\n",
- config_item_name(&reg->hr_item), reg->hr_bdev);
+ config_item_name(&reg->hr_item), reg_bdev(reg));
o2hb_arm_timeout(reg);
return 0;
}
@@ -515,7 +523,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
* GFP_KERNEL that the local node can get fenced. It would be
* nicest if we could pre-allocate these bios and avoid this
* all together. */
- bio = bio_alloc(reg->hr_bdev, 16, opf, GFP_ATOMIC);
+ bio = bio_alloc(reg_bdev(reg), 16, opf, GFP_ATOMIC);
if (!bio) {
mlog(ML_ERROR, "Could not alloc slots BIO!\n");
bio = ERR_PTR(-ENOMEM);
@@ -687,7 +695,7 @@ static int o2hb_check_own_slot(struct o2hb_region *reg)
errstr = ERRSTR3;
mlog(ML_ERROR, "%s (%pg): expected(%u:0x%llx, 0x%llx), "
- "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_bdev,
+ "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg_bdev(reg),
slot->ds_node_num, (unsigned long long)slot->ds_last_generation,
(unsigned long long)slot->ds_last_time, hb_block->hb_node,
(unsigned long long)le64_to_cpu(hb_block->hb_generation),
@@ -861,7 +869,7 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg)
goto unlock;
printk(KERN_NOTICE "o2hb: Region %s (%pg) is now a quorum device\n",
- config_item_name(&reg->hr_item), reg->hr_bdev);
+ config_item_name(&reg->hr_item), reg_bdev(reg));
set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
@@ -920,7 +928,7 @@ static int o2hb_check_slot(struct o2hb_region *reg,
* consider it a transient miss but don't populate any
* other values as they may be junk. */
mlog(ML_ERROR, "Node %d has written a bad crc to %pg\n",
- slot->ds_node_num, reg->hr_bdev);
+ slot->ds_node_num, reg_bdev(reg));
o2hb_dump_slot(hb_block);
slot->ds_equal_samples++;
@@ -1003,8 +1011,8 @@ fire_callbacks:
"of %u ms, but our count is %u ms.\n"
"Please double check your configuration values "
"for 'O2CB_HEARTBEAT_THRESHOLD'\n",
- slot->ds_node_num, reg->hr_bdev, slot_dead_ms,
- dead_ms);
+ slot->ds_node_num, reg_bdev(reg),
+ slot_dead_ms, dead_ms);
}
goto out;
}
@@ -1013,7 +1021,7 @@ fire_callbacks:
if (list_empty(&slot->ds_live_item))
goto out;
- /* live nodes only go dead after enough consequtive missed
+ /* live nodes only go dead after enough consecutive missed
* samples.. reset the missed counter whenever we see
* activity */
if (slot->ds_equal_samples >= o2hb_dead_threshold || gen_changed) {
@@ -1143,7 +1151,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
* can't be sure that the new block ever made it to
* disk */
mlog(ML_ERROR, "Write error %d on device \"%pg\"\n",
- write_wc.wc_error, reg->hr_bdev);
+ write_wc.wc_error, reg_bdev(reg));
ret = write_wc.wc_error;
goto bail;
}
@@ -1169,7 +1177,7 @@ bail:
printk(KERN_NOTICE "o2hb: Unable to stabilize "
"heartbeat on region %s (%pg)\n",
config_item_name(&reg->hr_item),
- reg->hr_bdev);
+ reg_bdev(reg));
atomic_set(&reg->hr_steady_iterations, 0);
reg->hr_aborted_start = 1;
wake_up(&o2hb_steady_queue);
@@ -1489,7 +1497,7 @@ static void o2hb_region_release(struct config_item *item)
struct page *page;
struct o2hb_region *reg = to_o2hb_region(item);
- mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg->hr_bdev);
+ mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg_bdev(reg));
kfree(reg->hr_tmp_block);
@@ -1502,8 +1510,8 @@ static void o2hb_region_release(struct config_item *item)
kfree(reg->hr_slot_data);
}
- if (reg->hr_bdev)
- blkdev_put(reg->hr_bdev, NULL);
+ if (reg->hr_bdev_file)
+ fput(reg->hr_bdev_file);
kfree(reg->hr_slots);
@@ -1528,10 +1536,11 @@ static int o2hb_read_block_input(struct o2hb_region *reg,
{
unsigned long bytes;
char *p = (char *)page;
+ int ret;
- bytes = simple_strtoul(p, &p, 0);
- if (!p || (*p && (*p != '\n')))
- return -EINVAL;
+ ret = kstrtoul(p, 0, &bytes);
+ if (ret)
+ return ret;
/* Heartbeat and fs min / max block sizes are the same. */
if (bytes > 4096 || bytes < 512)
@@ -1562,7 +1571,7 @@ static ssize_t o2hb_region_block_bytes_store(struct config_item *item,
unsigned long block_bytes;
unsigned int block_bits;
- if (reg->hr_bdev)
+ if (reg->hr_bdev_file)
return -EINVAL;
status = o2hb_read_block_input(reg, page, &block_bytes,
@@ -1591,7 +1600,7 @@ static ssize_t o2hb_region_start_block_store(struct config_item *item,
char *p = (char *)page;
ssize_t ret;
- if (reg->hr_bdev)
+ if (reg->hr_bdev_file)
return -EINVAL;
ret = kstrtoull(p, 0, &tmp);
@@ -1615,13 +1624,14 @@ static ssize_t o2hb_region_blocks_store(struct config_item *item,
struct o2hb_region *reg = to_o2hb_region(item);
unsigned long tmp;
char *p = (char *)page;
+ int ret;
- if (reg->hr_bdev)
+ if (reg->hr_bdev_file)
return -EINVAL;
- tmp = simple_strtoul(p, &p, 0);
- if (!p || (*p && (*p != '\n')))
- return -EINVAL;
+ ret = kstrtoul(p, 0, &tmp);
+ if (ret)
+ return ret;
if (tmp > O2NM_MAX_NODES || tmp == 0)
return -ERANGE;
@@ -1635,8 +1645,8 @@ static ssize_t o2hb_region_dev_show(struct config_item *item, char *page)
{
unsigned int ret = 0;
- if (to_o2hb_region(item)->hr_bdev)
- ret = sprintf(page, "%pg\n", to_o2hb_region(item)->hr_bdev);
+ if (to_o2hb_region(item)->hr_bdev_file)
+ ret = sprintf(page, "%pg\n", reg_bdev(to_o2hb_region(item)));
return ret;
}
@@ -1745,7 +1755,10 @@ out:
return ret;
}
-/* this is acting as commit; we set up all of hr_bdev and hr_task or nothing */
+/*
+ * this is acting as commit; we set up all of hr_bdev_file and hr_task or
+ * nothing
+ */
static ssize_t o2hb_region_dev_store(struct config_item *item,
const char *page,
size_t count)
@@ -1755,46 +1768,44 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
long fd;
int sectsize;
char *p = (char *)page;
- struct fd f;
ssize_t ret = -EINVAL;
int live_threshold;
- if (reg->hr_bdev)
- goto out;
+ if (reg->hr_bdev_file)
+ return -EINVAL;
/* We can't heartbeat without having had our node number
* configured yet. */
if (o2nm_this_node() == O2NM_MAX_NODES)
- goto out;
+ return -EINVAL;
- fd = simple_strtol(p, &p, 0);
- if (!p || (*p && (*p != '\n')))
- goto out;
+ ret = kstrtol(p, 0, &fd);
+ if (ret < 0)
+ return -EINVAL;
if (fd < 0 || fd >= INT_MAX)
- goto out;
+ return -EINVAL;
- f = fdget(fd);
- if (f.file == NULL)
- goto out;
+ CLASS(fd, f)(fd);
+ if (fd_empty(f))
+ return -EINVAL;
if (reg->hr_blocks == 0 || reg->hr_start_block == 0 ||
reg->hr_block_bytes == 0)
- goto out2;
+ return -EINVAL;
- if (!S_ISBLK(f.file->f_mapping->host->i_mode))
- goto out2;
+ if (!S_ISBLK(fd_file(f)->f_mapping->host->i_mode))
+ return -EINVAL;
- reg->hr_bdev = blkdev_get_by_dev(f.file->f_mapping->host->i_rdev,
- BLK_OPEN_WRITE | BLK_OPEN_READ, NULL,
- NULL);
- if (IS_ERR(reg->hr_bdev)) {
- ret = PTR_ERR(reg->hr_bdev);
- reg->hr_bdev = NULL;
- goto out2;
+ reg->hr_bdev_file = bdev_file_open_by_dev(fd_file(f)->f_mapping->host->i_rdev,
+ BLK_OPEN_WRITE | BLK_OPEN_READ, NULL, NULL);
+ if (IS_ERR(reg->hr_bdev_file)) {
+ ret = PTR_ERR(reg->hr_bdev_file);
+ reg->hr_bdev_file = NULL;
+ return ret;
}
- sectsize = bdev_logical_block_size(reg->hr_bdev);
+ sectsize = bdev_logical_block_size(reg_bdev(reg));
if (sectsize != reg->hr_block_bytes) {
mlog(ML_ERROR,
"blocksize %u incorrect for device, expected %d",
@@ -1890,16 +1901,13 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
if (hb_task && o2hb_global_heartbeat_active())
printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%pg)\n",
- config_item_name(&reg->hr_item), reg->hr_bdev);
+ config_item_name(&reg->hr_item), reg_bdev(reg));
out3:
if (ret < 0) {
- blkdev_put(reg->hr_bdev, NULL);
- reg->hr_bdev = NULL;
+ fput(reg->hr_bdev_file);
+ reg->hr_bdev_file = NULL;
}
-out2:
- fdput(f);
-out:
return ret;
}
@@ -2085,7 +2093,7 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
printk(KERN_NOTICE "o2hb: Heartbeat %s on region %s (%pg)\n",
((atomic_read(&reg->hr_steady_iterations) == 0) ?
"stopped" : "start aborted"), config_item_name(item),
- reg->hr_bdev);
+ reg_bdev(reg));
}
/*
@@ -2131,10 +2139,11 @@ static ssize_t o2hb_heartbeat_group_dead_threshold_store(struct config_item *ite
{
unsigned long tmp;
char *p = (char *)page;
+ int ret;
- tmp = simple_strtoul(p, &p, 10);
- if (!p || (*p && (*p != '\n')))
- return -EINVAL;
+ ret = kstrtoul(p, 10, &tmp);
+ if (ret)
+ return ret;
/* this will validate ranges for us. */
o2hb_dead_threshold_set((unsigned int) tmp);
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index b73fc42e46ff..630bd5a3dd0d 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -29,7 +29,7 @@
* just calling printk() so that this can eventually make its way through
* relayfs along with the debugging messages. Everything else gets KERN_DEBUG.
* The inline tests and macro dance give GCC the opportunity to quite cleverly
- * only emit the appropriage printk() when the caller passes in a constant
+ * only emit the appropriate printk() when the caller passes in a constant
* mask, as is almost always the case.
*
* All this bitmask nonsense is managed from the files under
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 15d0ed9c13e5..bfb8b456876c 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -23,7 +23,7 @@
* race between when we see a node start heartbeating and when we connect
* to it.
*
- * So nodes that are in this transtion put a hold on the quorum decision
+ * So nodes that are in this transition put a hold on the quorum decision
* with a counter. As they fall out of this transition they drop the count
* and if they're the last, they fire off the decision.
*/
@@ -60,7 +60,7 @@ static void o2quo_fence_self(void)
switch (o2nm_single_cluster->cl_fence_method) {
case O2NM_FENCE_PANIC:
panic("*** ocfs2 is very sorry to be fencing this system by "
- "panicing ***\n");
+ "panicking ***\n");
break;
default:
WARN_ON(o2nm_single_cluster->cl_fence_method >=
@@ -189,7 +189,7 @@ static void o2quo_clear_hold(struct o2quo_state *qs, u8 node)
}
/* as a node comes up we delay the quorum decision until we know the fate of
- * the connection. the hold will be droped in conn_up or hb_down. it might be
+ * the connection. the hold will be dropped in conn_up or hb_down. it might be
* perpetuated by con_err until hb_down. if we already have a conn, we might
* be dropping a hold that conn_up got. */
void o2quo_hb_up(u8 node)
@@ -256,7 +256,7 @@ void o2quo_hb_still_up(u8 node)
}
/* This is analogous to hb_up. as a node's connection comes up we delay the
- * quorum decision until we see it heartbeating. the hold will be droped in
+ * quorum decision until we see it heartbeating. the hold will be dropped in
* hb_up or hb_down. it might be perpetuated by con_err until hb_down. if
* it's already heartbeating we might be dropping a hold that conn_up got.
* */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 960080753d3b..79b281e32f4c 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -5,13 +5,13 @@
*
* ----
*
- * Callers for this were originally written against a very simple synchronus
+ * Callers for this were originally written against a very simple synchronous
* API. This implementation reflects those simple callers. Some day I'm sure
* we'll need to move to a more robust posting/callback mechanism.
*
* Transmit calls pass in kernel virtual addresses and block copying this into
* the socket's tx buffers via a usual blocking sendmsg. They'll block waiting
- * for a failed socket to timeout. TX callers can also pass in a poniter to an
+ * for a failed socket to timeout. TX callers can also pass in a pointer to an
* 'int' which gets filled with an errno off the wire in response to the
* message they send.
*
@@ -101,7 +101,7 @@ static struct socket *o2net_listen_sock;
* o2net_wq. teardown detaches the callbacks before destroying the workqueue.
* quorum work is queued as sock containers are shutdown.. stop_listening
* tears down all the node's sock containers, preventing future shutdowns
- * and queued quroum work, before canceling delayed quorum work and
+ * and queued quorum work, before canceling delayed quorum work and
* destroying the work queue.
*/
static struct workqueue_struct *o2net_wq;
@@ -724,7 +724,7 @@ static void o2net_shutdown_sc(struct work_struct *work)
if (o2net_unregister_callbacks(sc->sc_sock->sk, sc)) {
/* we shouldn't flush as we're in the thread, the
* races with pending sc work structs are harmless */
- del_timer_sync(&sc->sc_idle_timeout);
+ timer_delete_sync(&sc->sc_idle_timeout);
o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
sc_put(sc);
kernel_sock_shutdown(sc->sc_sock, SHUT_RDWR);
@@ -1419,7 +1419,7 @@ out:
return ret;
}
-/* this work func is triggerd by data ready. it reads until it can read no
+/* this work func is triggered by data ready. it reads until it can read no
* more. it interprets 0, eof, as fatal. if data_ready hits while we're doing
* our work the work struct will be marked and we'll be called again. */
static void o2net_rx_until_empty(struct work_struct *work)
@@ -1483,12 +1483,13 @@ static void o2net_sc_send_keep_req(struct work_struct *work)
sc_put(sc);
}
-/* socket shutdown does a del_timer_sync against this as it tears down.
+/* socket shutdown does a timer_delete_sync against this as it tears down.
* we can't start this timer until we've got to the point in sc buildup
* where shutdown is going to be involved */
static void o2net_idle_timer(struct timer_list *t)
{
- struct o2net_sock_container *sc = from_timer(sc, t, sc_idle_timeout);
+ struct o2net_sock_container *sc = timer_container_of(sc, t,
+ sc_idle_timeout);
struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
#ifdef CONFIG_DEBUG_FS
unsigned long msecs = ktime_to_ms(ktime_get()) -
@@ -1614,7 +1615,7 @@ static void o2net_start_connect(struct work_struct *work)
myaddr.sin_addr.s_addr = mynode->nd_ipv4_address;
myaddr.sin_port = htons(0); /* any port */
- ret = sock->ops->bind(sock, (struct sockaddr *)&myaddr,
+ ret = sock->ops->bind(sock, (struct sockaddr_unsized *)&myaddr,
sizeof(myaddr));
if (ret) {
mlog(ML_ERROR, "bind failed with %d at address %pI4\n",
@@ -1637,7 +1638,7 @@ static void o2net_start_connect(struct work_struct *work)
remoteaddr.sin_port = node->nd_ipv4_port;
ret = sc->sc_sock->ops->connect(sc->sc_sock,
- (struct sockaddr *)&remoteaddr,
+ (struct sockaddr_unsized *)&remoteaddr,
sizeof(remoteaddr),
O_NONBLOCK);
if (ret == -EINPROGRESS)
@@ -1784,6 +1785,9 @@ static int o2net_accept_one(struct socket *sock, int *more)
struct o2nm_node *node = NULL;
struct o2nm_node *local_node = NULL;
struct o2net_sock_container *sc = NULL;
+ struct proto_accept_arg arg = {
+ .flags = O_NONBLOCK,
+ };
struct o2net_node *nn;
unsigned int nofs_flag;
@@ -1802,7 +1806,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
new_sock->type = sock->type;
new_sock->ops = sock->ops;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
+ ret = sock->ops->accept(sock, new_sock, &arg);
if (ret < 0)
goto out;
@@ -1998,7 +2002,7 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port)
INIT_WORK(&o2net_listen_work, o2net_accept_many);
sock->sk->sk_reuse = SK_CAN_REUSE;
- ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+ ret = sock->ops->bind(sock, (struct sockaddr_unsized *)&sin, sizeof(sin));
if (ret < 0) {
printk(KERN_ERR "o2net: Error %d while binding socket at "
"%pI4:%u\n", ret, &addr, ntohs(port));
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 04fc8344063a..1873bbbb7e5b 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -32,7 +32,8 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
}
-static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+static int ocfs2_dentry_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
int ret = 0; /* if all else fails, just return false */
@@ -44,8 +45,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
inode = d_inode(dentry);
osb = OCFS2_SB(dentry->d_sb);
- trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len,
- dentry->d_name.name);
+ trace_ocfs2_dentry_revalidate(dentry, name->len, name->name);
/* For a negative dentry -
* check the generation number of the parent and compare with the
@@ -53,12 +53,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
*/
if (inode == NULL) {
unsigned long gen = (unsigned long) dentry->d_fsdata;
- unsigned long pgen;
- spin_lock(&dentry->d_lock);
- pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen;
- spin_unlock(&dentry->d_lock);
- trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len,
- dentry->d_name.name,
+ unsigned long pgen = OCFS2_I(dir)->ip_dir_lock_gen;
+ trace_ocfs2_dentry_revalidate_negative(name->len, name->name,
pgen, gen);
if (gen != pgen)
goto bail;
@@ -124,17 +120,10 @@ static int ocfs2_match_dentry(struct dentry *dentry,
if (!dentry->d_fsdata)
return 0;
- if (!dentry->d_parent)
- return 0;
-
if (skip_unhashed && d_unhashed(dentry))
return 0;
parent = d_inode(dentry->d_parent);
- /* Negative parent dentry? */
- if (!parent)
- return 0;
-
/* Name is in a different directory. */
if (OCFS2_I(parent)->ip_blkno != parent_blkno)
return 0;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 8b123d543e6e..2785ff245e79 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -294,13 +294,29 @@ out:
* bh passed here can be an inode block or a dir data block, depending
* on the inode inline data flag.
*/
-static int ocfs2_check_dir_entry(struct inode * dir,
- struct ocfs2_dir_entry * de,
- struct buffer_head * bh,
+static int ocfs2_check_dir_entry(struct inode *dir,
+ struct ocfs2_dir_entry *de,
+ struct buffer_head *bh,
+ char *buf,
+ unsigned int size,
unsigned long offset)
{
const char *error_msg = NULL;
- const int rlen = le16_to_cpu(de->rec_len);
+ unsigned long next_offset;
+ int rlen;
+
+ if (offset > size - OCFS2_DIR_REC_LEN(1)) {
+ /* Dirent is (maybe partially) beyond the buffer
+ * boundaries so touching 'de' members is unsafe.
+ */
+ mlog(ML_ERROR, "directory entry (#%llu: offset=%lu) "
+ "too close to end or out-of-bounds",
+ (unsigned long long)OCFS2_I(dir)->ip_blkno, offset);
+ return 0;
+ }
+
+ rlen = le16_to_cpu(de->rec_len);
+ next_offset = ((char *) de - buf) + rlen;
if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
error_msg = "rec_len is smaller than minimal";
@@ -308,9 +324,11 @@ static int ocfs2_check_dir_entry(struct inode * dir,
error_msg = "rec_len % 4 != 0";
else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
- else if (unlikely(
- ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
- error_msg = "directory entry across blocks";
+ else if (unlikely(next_offset > size))
+ error_msg = "directory entry overrun";
+ else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) &&
+ next_offset != size)
+ error_msg = "directory entry too close to end";
if (unlikely(error_msg != NULL))
mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
@@ -352,16 +370,17 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
de_buf = first_de;
dlimit = de_buf + bytes;
- while (de_buf < dlimit) {
+ while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
de = (struct ocfs2_dir_entry *) de_buf;
- if (de_buf + namelen <= dlimit &&
+ if (de->name + namelen <= dlimit &&
ocfs2_match(namelen, name, de)) {
/* found a match - just to be sure, do a full check */
- if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, first_de,
+ bytes, offset)) {
ret = -1;
goto bail;
}
@@ -772,6 +791,14 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
struct ocfs2_extent_block *eb;
struct ocfs2_extent_rec *rec = NULL;
+ if (le16_to_cpu(el->l_count) !=
+ ocfs2_extent_recs_per_dx_root(inode->i_sb)) {
+ ret = ocfs2_error(inode->i_sb,
+ "Inode %lu has invalid extent list length %u\n",
+ inode->i_ino, le16_to_cpu(el->l_count));
+ goto out;
+ }
+
if (el->l_tree_depth) {
ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash,
&eb_bh);
@@ -792,6 +819,14 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
}
}
+ if (le16_to_cpu(el->l_next_free_rec) == 0) {
+ ret = ocfs2_error(inode->i_sb,
+ "Inode %lu has empty extent list at depth %u\n",
+ inode->i_ino,
+ le16_to_cpu(el->l_tree_depth));
+ goto out;
+ }
+
found = 0;
for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
rec = &el->l_recs[i];
@@ -1059,26 +1094,39 @@ int ocfs2_find_entry(const char *name, int namelen,
{
struct buffer_head *bh;
struct ocfs2_dir_entry *res_dir = NULL;
+ int ret = 0;
if (ocfs2_dir_indexed(dir))
return ocfs2_find_entry_dx(name, namelen, dir, lookup);
+ if (unlikely(i_size_read(dir) <= 0)) {
+ ret = -EFSCORRUPTED;
+ mlog_errno(ret);
+ goto out;
+ }
/*
* The unindexed dir code only uses part of the lookup
* structure, so there's no reason to push it down further
* than this.
*/
- if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
+ if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ if (unlikely(i_size_read(dir) > dir->i_sb->s_blocksize)) {
+ ret = -EFSCORRUPTED;
+ mlog_errno(ret);
+ goto out;
+ }
bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir);
- else
+ } else {
bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir);
+ }
if (bh == NULL)
return -ENOENT;
lookup->dl_leaf_bh = bh;
lookup->dl_entry = res_dir;
- return 0;
+out:
+ return ret;
}
/*
@@ -1138,7 +1186,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
pde = NULL;
de = (struct ocfs2_dir_entry *) first_de;
while (i < bytes) {
- if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) {
status = -EIO;
mlog_errno(status);
goto bail;
@@ -1593,9 +1641,6 @@ int __ocfs2_add_entry(handle_t *handle,
struct buffer_head *insert_bh = lookup->dl_leaf_bh;
char *data_start = insert_bh->b_data;
- if (!namelen)
- return -EINVAL;
-
if (ocfs2_dir_indexed(dir)) {
struct buffer_head *bh;
@@ -1638,7 +1683,8 @@ int __ocfs2_add_entry(handle_t *handle,
/* These checks should've already been passed by the
* prepare function, but I guess we can leave them
* here anyway. */
- if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start,
+ size, offset)) {
retval = -ENOENT;
goto bail;
}
@@ -1658,7 +1704,8 @@ int __ocfs2_add_entry(handle_t *handle,
offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
if (ocfs2_dirent_would_fit(de, rec_len)) {
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir,
+ inode_set_ctime_current(dir));
retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
if (retval < 0) {
mlog_errno(retval);
@@ -1776,7 +1823,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
}
de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
- if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) {
+ if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data,
+ i_size_read(inode), ctx->pos)) {
/* On error, skip the f_pos to the end. */
ctx->pos = i_size_read(inode);
break;
@@ -1869,7 +1917,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
while (ctx->pos < i_size_read(inode)
&& offset < sb->s_blocksize) {
de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
- if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data,
+ sb->s_blocksize, offset)) {
/* On error, skip the f_pos to the
next block. */
ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
@@ -1925,6 +1974,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
{
int error = 0;
struct inode *inode = file_inode(file);
+ struct ocfs2_file_private *fp = file->private_data;
int lock_level = 0;
trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -1945,7 +1995,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
goto bail_nolock;
}
- error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false);
+ error = ocfs2_dir_foreach_blk(inode, &fp->cookie, ctx, false);
ocfs2_inode_unlock(inode, lock_level);
if (error)
@@ -2002,6 +2052,7 @@ int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
*
* Return 0 if the name does not exist
* Return -EEXIST if the directory contains the name
+ * Return -EFSCORRUPTED if found corruption
*
* Callers should have i_rwsem + a cluster lock on dir
*/
@@ -2015,9 +2066,12 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
trace_ocfs2_check_dir_for_entry(
(unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
- if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) {
+ ret = ocfs2_find_entry(name, namelen, dir, &lookup);
+ if (ret == 0) {
ret = -EEXIST;
mlog_errno(ret);
+ } else if (ret == -ENOENT) {
+ ret = 0;
}
ocfs2_free_dir_lookup_result(&lookup);
@@ -2962,11 +3016,11 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
ocfs2_dinode_new_extent_list(dir, di);
i_size_write(dir, sb->s_blocksize);
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
di->i_size = cpu_to_le64(sb->s_blocksize);
- di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(dir).tv_sec);
- di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(dir).tv_nsec);
+ di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime_sec(dir));
+ di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime_nsec(dir));
ocfs2_update_inode_fsync_trans(handle, dir, 1);
/*
@@ -3341,7 +3395,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
struct super_block *sb = dir->i_sb;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_dir_entry *de, *last_de = NULL;
- char *de_buf, *limit;
+ char *first_de, *de_buf, *limit;
unsigned long offset = 0;
unsigned int rec_len, new_rec_len, free_space;
@@ -3354,14 +3408,16 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
else
free_space = dir->i_sb->s_blocksize - i_size_read(dir);
- de_buf = di->id2.i_data.id_data;
+ first_de = di->id2.i_data.id_data;
+ de_buf = first_de;
limit = de_buf + i_size_read(dir);
rec_len = OCFS2_DIR_REC_LEN(namelen);
while (de_buf < limit) {
de = (struct ocfs2_dir_entry *)de_buf;
- if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de,
+ i_size_read(dir), offset)) {
ret = -ENOENT;
goto out;
}
@@ -3388,6 +3444,14 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
offset += le16_to_cpu(de->rec_len);
}
+ if (!last_de) {
+ ret = ocfs2_error(sb, "Directory entry (#%llu: size=%lld) "
+ "is unexpectedly short",
+ (unsigned long long)OCFS2_I(dir)->ip_blkno,
+ i_size_read(dir));
+ goto out;
+ }
+
/*
* We're going to require expansion of the directory - figure
* out how many blocks we'll need so that a place for the
@@ -3443,7 +3507,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
/* move to next block */
de = (struct ocfs2_dir_entry *) bh->b_data;
}
- if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize,
+ offset)) {
status = -ENOENT;
goto bail;
}
@@ -3501,16 +3566,6 @@ static int dx_leaf_sort_cmp(const void *a, const void *b)
return 0;
}
-static void dx_leaf_sort_swap(void *a, void *b, int size)
-{
- struct ocfs2_dx_entry *entry1 = a;
- struct ocfs2_dx_entry *entry2 = b;
-
- BUG_ON(size != sizeof(*entry1));
-
- swap(*entry1, *entry2);
-}
-
static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf)
{
struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
@@ -3771,7 +3826,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
*/
sort(dx_leaf->dl_list.de_entries, num_used,
sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
- dx_leaf_sort_swap);
+ NULL);
ocfs2_journal_dirty(handle, dx_leaf_bh);
@@ -4078,10 +4133,15 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
}
dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE;
- memset(&dx_root->dr_list, 0, osb->sb->s_blocksize -
- offsetof(struct ocfs2_dx_root_block, dr_list));
+
+ dx_root->dr_list.l_tree_depth = 0;
dx_root->dr_list.l_count =
cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
+ dx_root->dr_list.l_next_free_rec = 0;
+ memset(&dx_root->dr_list.l_recs, 0,
+ osb->sb->s_blocksize -
+ (offsetof(struct ocfs2_dx_root_block, dr_list) +
+ offsetof(struct ocfs2_extent_list, l_recs)));
/* This should never fail considering we start with an empty
* dx_root. */
@@ -4244,12 +4304,6 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
trace_ocfs2_prepare_dir_for_insert(
(unsigned long long)OCFS2_I(dir)->ip_blkno, namelen);
- if (!namelen) {
- ret = -EINVAL;
- mlog_errno(ret);
- goto out;
- }
-
/*
* Do this up front to reduce confusion.
*
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
index bae60ca2672a..1969db8ffa9c 100644
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -62,8 +62,6 @@ enum dlm_status {
DLM_MAXSTATS, /* 41: upper limit for return code validation */
};
-/* for pretty-printing dlm_status error messages */
-const char *dlm_errmsg(enum dlm_status err);
/* for pretty-printing dlm_status error names */
const char *dlm_errname(enum dlm_status err);
@@ -120,7 +118,7 @@ struct dlm_lockstatus {
#define LKM_VALBLK 0x00000100 /* lock value block request */
#define LKM_NOQUEUE 0x00000200 /* non blocking request */
#define LKM_CONVERT 0x00000400 /* conversion request */
-#define LKM_NODLCKWT 0x00000800 /* this lock wont deadlock (U) */
+#define LKM_NODLCKWT 0x00000800 /* this lock won't deadlock (U) */
#define LKM_UNLOCK 0x00001000 /* deallocate this lock */
#define LKM_CANCEL 0x00002000 /* cancel conversion request */
#define LKM_DEQALL 0x00004000 /* remove all locks held by proc (U) */
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index be5e9ed7da8d..fe4fdd09bae3 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/export.h>
+#include <linux/string_choices.h>
#include "../cluster/heartbeat.h"
#include "../cluster/nodemanager.h"
@@ -90,12 +91,12 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
buf, res->owner, res->state);
printk(" last used: %lu, refcnt: %u, on purge list: %s\n",
res->last_used, kref_read(&res->refs),
- list_empty(&res->purge) ? "no" : "yes");
+ str_no_yes(list_empty(&res->purge)));
printk(" on dirty list: %s, on reco list: %s, "
"migrating pending: %s\n",
- list_empty(&res->dirty) ? "no" : "yes",
- list_empty(&res->recovering) ? "no" : "yes",
- res->migration_pending ? "yes" : "no");
+ str_no_yes(list_empty(&res->dirty)),
+ str_no_yes(list_empty(&res->recovering)),
+ str_yes_no(res->migration_pending));
printk(" inflight locks: %d, asts reserved: %d\n",
res->inflight_locks, atomic_read(&res->asts_reserved));
dlm_print_lockres_refmap(res);
@@ -164,59 +165,6 @@ static const char *dlm_errnames[] = {
[DLM_MAXSTATS] = "DLM_MAXSTATS",
};
-static const char *dlm_errmsgs[] = {
- [DLM_NORMAL] = "request in progress",
- [DLM_GRANTED] = "request granted",
- [DLM_DENIED] = "request denied",
- [DLM_DENIED_NOLOCKS] = "request denied, out of system resources",
- [DLM_WORKING] = "async request in progress",
- [DLM_BLOCKED] = "lock request blocked",
- [DLM_BLOCKED_ORPHAN] = "lock request blocked by a orphan lock",
- [DLM_DENIED_GRACE_PERIOD] = "topological change in progress",
- [DLM_SYSERR] = "system error",
- [DLM_NOSUPPORT] = "unsupported",
- [DLM_CANCELGRANT] = "can't cancel convert: already granted",
- [DLM_IVLOCKID] = "bad lockid",
- [DLM_SYNC] = "synchronous request granted",
- [DLM_BADTYPE] = "bad resource type",
- [DLM_BADRESOURCE] = "bad resource handle",
- [DLM_MAXHANDLES] = "no more resource handles",
- [DLM_NOCLINFO] = "can't contact cluster manager",
- [DLM_NOLOCKMGR] = "can't contact lock manager",
- [DLM_NOPURGED] = "can't contact purge daemon",
- [DLM_BADARGS] = "bad api args",
- [DLM_VOID] = "no status",
- [DLM_NOTQUEUED] = "NOQUEUE was specified and request failed",
- [DLM_IVBUFLEN] = "invalid resource name length",
- [DLM_CVTUNGRANT] = "attempted to convert ungranted lock",
- [DLM_BADPARAM] = "invalid lock mode specified",
- [DLM_VALNOTVALID] = "value block has been invalidated",
- [DLM_REJECTED] = "request rejected, unrecognized client",
- [DLM_ABORT] = "blocked lock request cancelled",
- [DLM_CANCEL] = "conversion request cancelled",
- [DLM_IVRESHANDLE] = "invalid resource handle",
- [DLM_DEADLOCK] = "deadlock recovery refused this request",
- [DLM_DENIED_NOASTS] = "failed to allocate AST",
- [DLM_FORWARD] = "request must wait for primary's response",
- [DLM_TIMEOUT] = "timeout value for lock has expired",
- [DLM_IVGROUPID] = "invalid group specification",
- [DLM_VERS_CONFLICT] = "version conflicts prevent request handling",
- [DLM_BAD_DEVICE_PATH] = "Locks device does not exist or path wrong",
- [DLM_NO_DEVICE_PERMISSION] = "Client has insufficient perms for device",
- [DLM_NO_CONTROL_DEVICE] = "Cannot set options on opened device ",
- [DLM_RECOVERING] = "lock resource being recovered",
- [DLM_MIGRATING] = "lock resource being migrated",
- [DLM_MAXSTATS] = "invalid error number",
-};
-
-const char *dlm_errmsg(enum dlm_status err)
-{
- if (err >= DLM_MAXSTATS || err < 0)
- return dlm_errmsgs[DLM_MAXSTATS];
- return dlm_errmsgs[err];
-}
-EXPORT_SYMBOL_GPL(dlm_errmsg);
-
const char *dlm_errname(enum dlm_status err)
{
if (err >= DLM_MAXSTATS || err < 0)
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 5c04dde99981..2347a50f079b 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1274,7 +1274,7 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
{
struct dlm_query_nodeinfo *qn;
struct dlm_ctxt *dlm = NULL;
- int locked = 0, status = -EINVAL;
+ int status = -EINVAL;
qn = (struct dlm_query_nodeinfo *) msg->buf;
@@ -1290,12 +1290,11 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
}
spin_lock(&dlm->spinlock);
- locked = 1;
if (dlm->joining_node != qn->qn_nodenum) {
mlog(ML_ERROR, "Node %d queried nodes on domain %s but "
"joining node is %d\n", qn->qn_nodenum, qn->qn_domain,
dlm->joining_node);
- goto bail;
+ goto unlock;
}
/* Support for node query was added in 1.1 */
@@ -1305,14 +1304,14 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
"but active dlm protocol is %d.%d\n", qn->qn_nodenum,
qn->qn_domain, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
- goto bail;
+ goto unlock;
}
status = dlm_match_nodes(dlm, qn);
+unlock:
+ spin_unlock(&dlm->spinlock);
bail:
- if (locked)
- spin_unlock(&dlm->spinlock);
spin_unlock(&dlm_domain_lock);
return status;
@@ -1528,7 +1527,6 @@ static void dlm_send_join_asserts(struct dlm_ctxt *dlm,
{
int status, node, live;
- status = 0;
node = -1;
while ((node = find_next_bit(node_map, O2NM_MAX_NODES,
node + 1)) < O2NM_MAX_NODES) {
@@ -1878,7 +1876,8 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
dlm_debug_init(dlm);
snprintf(wq_name, O2NM_MAX_NAME_LEN, "dlm_wq-%s", dlm->name);
- dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM, 0);
+ dlm->dlm_worker = alloc_workqueue(wq_name, WQ_MEM_RECLAIM | WQ_PERCPU,
+ 0);
if (!dlm->dlm_worker) {
status = -ENOMEM;
mlog_errno(status);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index d610da8e2f24..4145e06d2c08 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -21,7 +21,7 @@
#include <linux/inet.h>
#include <linux/spinlock.h>
#include <linux/delay.h>
-
+#include <linux/string_choices.h>
#include "../cluster/heartbeat.h"
#include "../cluster/nodemanager.h"
@@ -1477,7 +1477,6 @@ way_up_top:
goto send_response;
} else if (res->owner != DLM_LOCK_RES_OWNER_UNKNOWN) {
spin_unlock(&res->spinlock);
- // mlog(0, "node %u is the master\n", res->owner);
response = DLM_MASTER_RESP_NO;
if (mle)
kmem_cache_free(dlm_mle_cache, mle);
@@ -1493,7 +1492,6 @@ way_up_top:
BUG();
}
- // mlog(0, "lockres is in progress...\n");
spin_lock(&dlm->master_lock);
found = dlm_find_mle(dlm, &tmpmle, name, namelen);
if (!found) {
@@ -1503,8 +1501,6 @@ way_up_top:
set_maybe = 1;
spin_lock(&tmpmle->spinlock);
if (tmpmle->type == DLM_MLE_BLOCK) {
- // mlog(0, "this node is waiting for "
- // "lockres to be mastered\n");
response = DLM_MASTER_RESP_NO;
} else if (tmpmle->type == DLM_MLE_MIGRATION) {
mlog(0, "node %u is master, but trying to migrate to "
@@ -1531,8 +1527,6 @@ way_up_top:
} else
response = DLM_MASTER_RESP_NO;
} else {
- // mlog(0, "this node is attempting to "
- // "master lockres\n");
response = DLM_MASTER_RESP_MAYBE;
}
if (set_maybe)
@@ -1559,7 +1553,6 @@ way_up_top:
found = dlm_find_mle(dlm, &tmpmle, name, namelen);
if (!found) {
/* this lockid has never been seen on this node yet */
- // mlog(0, "no mle found\n");
if (!mle) {
spin_unlock(&dlm->master_lock);
spin_unlock(&dlm->spinlock);
@@ -1573,8 +1566,6 @@ way_up_top:
goto way_up_top;
}
- // mlog(0, "this is second time thru, already allocated, "
- // "add the block.\n");
dlm_init_mle(mle, DLM_MLE_BLOCK, dlm, NULL, name, namelen);
set_bit(request->node_idx, mle->maybe_map);
__dlm_insert_mle(dlm, mle);
@@ -1897,8 +1888,6 @@ ok:
spin_unlock(&res->spinlock);
}
- // mlog(0, "woo! got an assert_master from node %u!\n",
- // assert->node_idx);
if (mle) {
int extra_ref = 0;
int nn = -1;
@@ -2859,7 +2848,7 @@ static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
dlm_lockres_release_ast(dlm, res);
mlog(0, "about to wait on migration_wq, dirty=%s\n",
- res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no");
+ str_yes_no(res->state & DLM_LOCK_RES_DIRTY));
/* if the extra ref we just put was the final one, this
* will pass thru immediately. otherwise, we need to wait
* for the last ast to finish. */
@@ -2869,12 +2858,12 @@ again:
msecs_to_jiffies(1000));
if (ret < 0) {
mlog(0, "woken again: migrating? %s, dead? %s\n",
- res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no",
- test_bit(target, dlm->domain_map) ? "no":"yes");
+ str_yes_no(res->state & DLM_LOCK_RES_MIGRATING),
+ str_no_yes(test_bit(target, dlm->domain_map)));
} else {
mlog(0, "all is well: migrating? %s, dead? %s\n",
- res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no",
- test_bit(target, dlm->domain_map) ? "no":"yes");
+ str_yes_no(res->state & DLM_LOCK_RES_MIGRATING),
+ str_no_yes(test_bit(target, dlm->domain_map)));
}
if (!dlm_migration_can_proceed(dlm, res, target)) {
mlog(0, "trying again...\n");
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 50da8af988c1..843ee02bd85f 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -22,7 +22,7 @@
#include <linux/timer.h>
#include <linux/kthread.h>
#include <linux/delay.h>
-
+#include <linux/string_choices.h>
#include "../cluster/heartbeat.h"
#include "../cluster/nodemanager.h"
@@ -207,7 +207,7 @@ void dlm_complete_recovery_thread(struct dlm_ctxt *dlm)
* 1) all recovery threads cluster wide will work on recovering
* ONE node at a time
* 2) negotiate who will take over all the locks for the dead node.
- * thats right... ALL the locks.
+ * that's right... ALL the locks.
* 3) once a new master is chosen, everyone scans all locks
* and moves aside those mastered by the dead guy
* 4) each of these locks should be locked until recovery is done
@@ -464,7 +464,6 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm)
}
if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
- // mlog(0, "nothing to recover! sleeping now!\n");
spin_unlock(&dlm->spinlock);
/* return to main thread loop and sleep. */
return 0;
@@ -581,8 +580,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
msecs_to_jiffies(1000));
mlog(0, "waited 1 sec for %u, "
"dead? %s\n", ndata->node_num,
- dlm_is_node_dead(dlm, ndata->node_num) ?
- "yes" : "no");
+ str_yes_no(dlm_is_node_dead(dlm, ndata->node_num)));
} else {
/* -ENOMEM on the other node */
mlog(0, "%s: node %u returned "
@@ -677,7 +675,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
spin_unlock(&dlm_reco_state_lock);
mlog(0, "pass #%d, all_nodes_done?: %s\n", ++pass,
- all_nodes_done?"yes":"no");
+ str_yes_no(all_nodes_done));
if (all_nodes_done) {
int ret;
@@ -1469,7 +1467,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
* The first one is handled at the end of this function. The
* other two are handled in the worker thread after locks have
* been attached. Yes, we don't wait for purge time to match
- * kref_init. The lockres will still have atleast one ref
+ * kref_init. The lockres will still have at least one ref
* added because it is in the hash __dlm_insert_lockres() */
extra_refs++;
@@ -1735,7 +1733,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
spin_unlock(&res->spinlock);
}
} else {
- /* put.. incase we are not the master */
+ /* put.. in case we are not the master */
spin_unlock(&res->spinlock);
dlm_lockres_put(res);
}
@@ -2633,7 +2631,7 @@ again:
dlm_reco_master_ready(dlm),
msecs_to_jiffies(1000));
if (!dlm_reco_master_ready(dlm)) {
- mlog(0, "%s: reco master taking awhile\n",
+ mlog(0, "%s: reco master taking a while\n",
dlm->name);
goto again;
}
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 81265123ce6c..339f0b11cdc8 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -20,6 +20,7 @@
#include <linux/module.h>
#include <linux/fs.h>
+#include <linux/fs_context.h>
#include <linux/pagemap.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -80,8 +81,7 @@ static int param_set_dlmfs_capabilities(const char *val,
static int param_get_dlmfs_capabilities(char *buffer,
const struct kernel_param *kp)
{
- return strlcpy(buffer, DLMFS_CAPABILITIES,
- strlen(DLMFS_CAPABILITIES) + 1);
+ return sysfs_emit(buffer, DLMFS_CAPABILITIES);
}
module_param_call(capabilities, param_set_dlmfs_capabilities,
param_get_dlmfs_capabilities, NULL, 0444);
@@ -337,7 +337,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
if (inode) {
inode->i_ino = get_next_ino();
inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+ simple_inode_init_ts(inode);
inc_nlink(inode);
inode->i_fop = &simple_dir_operations;
@@ -360,7 +360,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
inode->i_ino = get_next_ino();
inode_init_owner(&nop_mnt_idmap, inode, parent, mode);
- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+ simple_inode_init_ts(inode);
ip = DLMFS_I(inode);
ip->ip_conn = DLMFS_I(parent)->ip_conn;
@@ -402,10 +402,10 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
* File creation. Allocate an inode, and we're done..
*/
/* SMP-safe */
-static int dlmfs_mkdir(struct mnt_idmap * idmap,
- struct inode * dir,
- struct dentry * dentry,
- umode_t mode)
+static struct dentry *dlmfs_mkdir(struct mnt_idmap * idmap,
+ struct inode * dir,
+ struct dentry * dentry,
+ umode_t mode)
{
int status;
struct inode *inode = NULL;
@@ -441,14 +441,13 @@ static int dlmfs_mkdir(struct mnt_idmap * idmap,
ip->ip_conn = conn;
inc_nlink(dir);
- d_instantiate(dentry, inode);
- dget(dentry); /* Extra count - pin the dentry in core */
+ d_make_persistent(dentry, inode);
status = 0;
bail:
if (status < 0)
iput(inode);
- return status;
+ return ERR_PTR(status);
}
static int dlmfs_create(struct mnt_idmap *idmap,
@@ -480,8 +479,7 @@ static int dlmfs_create(struct mnt_idmap *idmap,
goto bail;
}
- d_instantiate(dentry, inode);
- dget(dentry); /* Extra count - pin the dentry in core */
+ d_make_persistent(dentry, inode);
bail:
return status;
}
@@ -507,9 +505,7 @@ bail:
return status;
}
-static int dlmfs_fill_super(struct super_block * sb,
- void * data,
- int silent)
+static int dlmfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_SIZE;
@@ -549,7 +545,7 @@ static const struct super_operations dlmfs_ops = {
.alloc_inode = dlmfs_alloc_inode,
.free_inode = dlmfs_free_inode,
.evict_inode = dlmfs_evict_inode,
- .drop_inode = generic_delete_inode,
+ .drop_inode = inode_just_drop,
};
static const struct inode_operations dlmfs_file_inode_operations = {
@@ -557,17 +553,27 @@ static const struct inode_operations dlmfs_file_inode_operations = {
.setattr = dlmfs_file_setattr,
};
-static struct dentry *dlmfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int dlmfs_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, dlmfs_fill_super);
+}
+
+static const struct fs_context_operations dlmfs_context_ops = {
+ .get_tree = dlmfs_get_tree,
+};
+
+static int dlmfs_init_fs_context(struct fs_context *fc)
{
- return mount_nodev(fs_type, flags, data, dlmfs_fill_super);
+ fc->ops = &dlmfs_context_ops;
+
+ return 0;
}
static struct file_system_type dlmfs_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2_dlmfs",
- .mount = dlmfs_mount,
- .kill_sb = kill_litter_super,
+ .kill_sb = kill_anon_super,
+ .init_fs_context = dlmfs_init_fs_context,
};
MODULE_ALIAS_FS("ocfs2_dlmfs");
@@ -579,7 +585,7 @@ static int __init init_dlmfs_fs(void)
dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache",
sizeof(struct dlmfs_inode_private),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
dlmfs_init_once);
if (!dlmfs_inode_cache) {
status = -ENOMEM;
@@ -587,7 +593,8 @@ static int __init init_dlmfs_fs(void)
}
cleanup_inode = 1;
- user_dlm_worker = alloc_workqueue("user_dlm", WQ_MEM_RECLAIM, 0);
+ user_dlm_worker = alloc_workqueue("user_dlm",
+ WQ_MEM_RECLAIM | WQ_PERCPU, 0);
if (!user_dlm_worker) {
status = -ENOMEM;
goto bail;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index c3e2961ee5db..619ff03b15d6 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -19,6 +19,7 @@
#include <linux/delay.h>
#include <linux/quotaops.h>
#include <linux/sched/signal.h>
+#include <linux/string_choices.h>
#define MLOG_MASK_PREFIX ML_DLM_GLUE
#include <cluster/masklog.h>
@@ -221,12 +222,12 @@ struct ocfs2_lock_res_ops {
*/
#define LOCK_TYPE_USES_LVB 0x2
-static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
.get_osb = ocfs2_get_inode_osb,
.check_downconvert = ocfs2_check_meta_downconvert,
.set_lvb = ocfs2_set_meta_lvb,
@@ -234,50 +235,50 @@ static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_super_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_super_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH,
};
-static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_rename_lops = {
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
.get_osb = ocfs2_get_dentry_osb,
.post_unlock = ocfs2_dentry_post_unlock,
.downconvert_worker = ocfs2_dentry_convert_worker,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_flock_lops = {
.get_osb = ocfs2_get_file_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
.set_lvb = ocfs2_set_qinfo_lvb,
.get_osb = ocfs2_get_qinfo_osb,
.flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
.check_downconvert = ocfs2_check_refcount_downconvert,
.downconvert_worker = ocfs2_refcount_convert_worker,
.flags = 0,
@@ -510,7 +511,7 @@ static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
struct ocfs2_lock_res *res,
enum ocfs2_lock_type type,
- struct ocfs2_lock_res_ops *ops,
+ const struct ocfs2_lock_res_ops *ops,
void *priv)
{
res->l_type = type;
@@ -553,7 +554,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
unsigned int generation,
struct inode *inode)
{
- struct ocfs2_lock_res_ops *ops;
+ const struct ocfs2_lock_res_ops *ops;
switch(type) {
case OCFS2_LOCK_TYPE_RW:
@@ -794,7 +795,7 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
/*
* Keep a list of processes who have interest in a lockres.
- * Note: this is now only uesed for check recursive cluster locking.
+ * Note: this is now only used for check recursive cluster locking.
*/
static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
struct ocfs2_lock_holder *oh)
@@ -1615,7 +1616,7 @@ update_holders:
unlock:
lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
- /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
+ /* ocfs2_unblock_lock request on seeing OCFS2_LOCK_UPCONVERT_FINISHING */
kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED);
spin_unlock_irqrestore(&lockres->l_lock, flags);
@@ -2162,7 +2163,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
struct ocfs2_meta_lvb *lvb;
- struct timespec64 ctime = inode_get_ctime(inode);
+ struct timespec64 ts;
lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
@@ -2183,12 +2184,12 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
lvb->lvb_igid = cpu_to_be32(i_gid_read(inode));
lvb->lvb_imode = cpu_to_be16(inode->i_mode);
lvb->lvb_inlink = cpu_to_be16(inode->i_nlink);
- lvb->lvb_iatime_packed =
- cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
- lvb->lvb_ictime_packed =
- cpu_to_be64(ocfs2_pack_timespec(&ctime));
- lvb->lvb_imtime_packed =
- cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
+ ts = inode_get_atime(inode);
+ lvb->lvb_iatime_packed = cpu_to_be64(ocfs2_pack_timespec(&ts));
+ ts = inode_get_ctime(inode);
+ lvb->lvb_ictime_packed = cpu_to_be64(ocfs2_pack_timespec(&ts));
+ ts = inode_get_mtime(inode);
+ lvb->lvb_imtime_packed = cpu_to_be64(ocfs2_pack_timespec(&ts));
lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
@@ -2209,7 +2210,7 @@ static int ocfs2_refresh_inode_from_lvb(struct inode *inode)
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
struct ocfs2_meta_lvb *lvb;
- struct timespec64 ctime;
+ struct timespec64 ts;
mlog_meta_lvb(0, lockres);
@@ -2236,13 +2237,12 @@ static int ocfs2_refresh_inode_from_lvb(struct inode *inode)
i_gid_write(inode, be32_to_cpu(lvb->lvb_igid));
inode->i_mode = be16_to_cpu(lvb->lvb_imode);
set_nlink(inode, be16_to_cpu(lvb->lvb_inlink));
- ocfs2_unpack_timespec(&inode->i_atime,
- be64_to_cpu(lvb->lvb_iatime_packed));
- ocfs2_unpack_timespec(&inode->i_mtime,
- be64_to_cpu(lvb->lvb_imtime_packed));
- ocfs2_unpack_timespec(&ctime,
- be64_to_cpu(lvb->lvb_ictime_packed));
- inode_set_ctime_to_ts(inode, ctime);
+ ocfs2_unpack_timespec(&ts, be64_to_cpu(lvb->lvb_iatime_packed));
+ inode_set_atime_to_ts(inode, ts);
+ ocfs2_unpack_timespec(&ts, be64_to_cpu(lvb->lvb_imtime_packed));
+ inode_set_mtime_to_ts(inode, ts);
+ ocfs2_unpack_timespec(&ts, be64_to_cpu(lvb->lvb_ictime_packed));
+ inode_set_ctime_to_ts(inode, ts);
spin_unlock(&oi->ip_lock);
return 0;
}
@@ -2487,7 +2487,7 @@ update:
* which hasn't been populated yet, so clear the refresh flag
* and let the caller handle it.
*/
- if (inode->i_state & I_NEW) {
+ if (inode_state_read_once(inode) & I_NEW) {
status = 0;
if (lockres)
ocfs2_complete_lock_res_refresh(lockres, 0);
@@ -2530,30 +2530,28 @@ bail:
/*
* This is working around a lock inversion between tasks acquiring DLM
- * locks while holding a page lock and the downconvert thread which
- * blocks dlm lock acquiry while acquiring page locks.
+ * locks while holding a folio lock and the downconvert thread which
+ * blocks dlm lock acquiry while acquiring folio locks.
*
- * ** These _with_page variantes are only intended to be called from aop
- * methods that hold page locks and return a very specific *positive* error
+ * ** These _with_folio variants are only intended to be called from aop
+ * methods that hold folio locks and return a very specific *positive* error
* code that aop methods pass up to the VFS -- test for errors with != 0. **
*
* The DLM is called such that it returns -EAGAIN if it would have
* blocked waiting for the downconvert thread. In that case we unlock
- * our page so the downconvert thread can make progress. Once we've
+ * our folio so the downconvert thread can make progress. Once we've
* done this we have to return AOP_TRUNCATED_PAGE so the aop method
* that called us can bubble that back up into the VFS who will then
* immediately retry the aop call.
*/
-int ocfs2_inode_lock_with_page(struct inode *inode,
- struct buffer_head **ret_bh,
- int ex,
- struct page *page)
+int ocfs2_inode_lock_with_folio(struct inode *inode,
+ struct buffer_head **ret_bh, int ex, struct folio *folio)
{
int ret;
ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
if (ret == -EAGAIN) {
- unlock_page(page);
+ folio_unlock(folio);
/*
* If we can't get inode lock immediately, we should not return
* directly here, since this will lead to a softlockup problem.
@@ -2631,7 +2629,7 @@ void ocfs2_inode_unlock(struct inode *inode,
}
/*
- * This _tracker variantes are introduced to deal with the recursive cluster
+ * This _tracker variants are introduced to deal with the recursive cluster
* locking issue. The idea is to keep track of a lock holder on the stack of
* the current process. If there's a lock holder on the stack, we know the
* task context is already protected by cluster locking. Currently, they're
@@ -2736,7 +2734,7 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
struct ocfs2_lock_res *lockres;
lockres = &OCFS2_I(inode)->ip_inode_lockres;
- /* had_lock means that the currect process already takes the cluster
+ /* had_lock means that the current process already takes the cluster
* lock previously.
* If had_lock is 1, we have nothing to do here.
* If had_lock is 0, we will release the lock.
@@ -3111,6 +3109,7 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
struct ocfs2_lock_res *iter = v;
struct ocfs2_lock_res *dummy = &priv->p_iter_res;
+ (*pos)++;
spin_lock(&ocfs2_dlm_tracking_lock);
iter = ocfs2_dlm_next_res(iter, priv);
list_del_init(&dummy->l_debug_list);
@@ -3152,11 +3151,8 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
#ifdef CONFIG_OCFS2_FS_STATS
if (!lockres->l_lock_wait && dlm_debug->d_filter_secs) {
now = ktime_to_us(ktime_get_real());
- if (lockres->l_lock_prmode.ls_last >
- lockres->l_lock_exmode.ls_last)
- last = lockres->l_lock_prmode.ls_last;
- else
- last = lockres->l_lock_exmode.ls_last;
+ last = max(lockres->l_lock_prmode.ls_last,
+ lockres->l_lock_exmode.ls_last);
/*
* Use d_filter_secs field to filter lock resources dump,
* the default d_filter_secs(0) value filters nothing,
@@ -3805,9 +3801,9 @@ recheck:
* set when the ast is received for an upconvert just before the
* OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
* on the heels of the ast, we want to delay the downconvert just
- * enough to allow the up requestor to do its task. Because this
+ * enough to allow the up requester to do its task. Because this
* lock is in the blocked queue, the lock will be downconverted
- * as soon as the requestor is done with the lock.
+ * as soon as the requester is done with the lock.
*/
if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
goto leave_requeue;
@@ -4342,7 +4338,7 @@ unqueue:
ocfs2_schedule_blocked_lock(osb, lockres);
mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
- ctl.requeue ? "yes" : "no");
+ str_yes_no(ctl.requeue));
spin_unlock_irqrestore(&lockres->l_lock, flags);
if (ctl.unblock_action != UNBLOCK_CONTINUE
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index e5da5809ed95..a3ebd7303ea2 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -137,10 +137,8 @@ int ocfs2_inode_lock_full_nested(struct inode *inode,
int ex,
int arg_flags,
int subclass);
-int ocfs2_inode_lock_with_page(struct inode *inode,
- struct buffer_head **ret_bh,
- int ex,
- struct page *page);
+int ocfs2_inode_lock_with_folio(struct inode *inode,
+ struct buffer_head **ret_bh, int ex, struct folio *folio);
/* Variants without special locking class or flags */
#define ocfs2_inode_lock_full(i, r, e, f)\
ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL)
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index eaa8c80ace3c..b95724b767e1 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -255,9 +255,9 @@ static struct dentry *ocfs2_fh_to_dentry(struct super_block *sb,
if (fh_len < 3 || fh_type > 2)
return NULL;
- handle.ih_blkno = (u64)le32_to_cpu(fid->raw[0]) << 32;
- handle.ih_blkno |= (u64)le32_to_cpu(fid->raw[1]);
- handle.ih_generation = le32_to_cpu(fid->raw[2]);
+ handle.ih_blkno = (u64)le32_to_cpu((__force __le32)fid->raw[0]) << 32;
+ handle.ih_blkno |= (u64)le32_to_cpu((__force __le32)fid->raw[1]);
+ handle.ih_generation = le32_to_cpu((__force __le32)fid->raw[2]);
return ocfs2_get_dentry(sb, &handle);
}
@@ -269,9 +269,9 @@ static struct dentry *ocfs2_fh_to_parent(struct super_block *sb,
if (fh_type != 2 || fh_len < 6)
return NULL;
- parent.ih_blkno = (u64)le32_to_cpu(fid->raw[3]) << 32;
- parent.ih_blkno |= (u64)le32_to_cpu(fid->raw[4]);
- parent.ih_generation = le32_to_cpu(fid->raw[5]);
+ parent.ih_blkno = (u64)le32_to_cpu((__force __le32)fid->raw[3]) << 32;
+ parent.ih_blkno |= (u64)le32_to_cpu((__force __le32)fid->raw[4]);
+ parent.ih_generation = le32_to_cpu((__force __le32)fid->raw[5]);
return ocfs2_get_dentry(sb, &parent);
}
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 70a768b623cf..ef147e8b3271 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -435,6 +435,16 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
}
}
+ if (le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)) {
+ ocfs2_error(inode->i_sb,
+ "Inode %lu has an invalid extent (next_free_rec %u, count %u)\n",
+ inode->i_ino,
+ le16_to_cpu(el->l_next_free_rec),
+ le16_to_cpu(el->l_count));
+ ret = -EROFS;
+ goto out;
+ }
+
i = ocfs2_search_extent_list(el, v_cluster);
if (i == -1) {
/*
@@ -696,6 +706,8 @@ out:
* it not only handles the fiemap for inlined files, but also deals
* with the fast symlink, cause they have no difference for extent
* mapping per se.
+ *
+ * Must be called with ip_alloc_sem semaphore held.
*/
static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
struct fiemap_extent_info *fieinfo,
@@ -707,6 +719,7 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
u64 phys;
u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ lockdep_assert_held_read(&oi->ip_alloc_sem);
di = (struct ocfs2_dinode *)di_bh->b_data;
if (ocfs2_inode_is_fast_symlink(inode))
@@ -722,8 +735,11 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
phys += offsetof(struct ocfs2_dinode,
id2.i_data.id_data);
+ /* Release the ip_alloc_sem to prevent deadlock on page fault */
+ up_read(&OCFS2_I(inode)->ip_alloc_sem);
ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
flags);
+ down_read(&OCFS2_I(inode)->ip_alloc_sem);
if (ret < 0)
return ret;
}
@@ -792,9 +808,11 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits;
virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits;
-
+ /* Release the ip_alloc_sem to prevent deadlock on page fault */
+ up_read(&OCFS2_I(inode)->ip_alloc_sem);
ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes,
len_bytes, fe_flags);
+ down_read(&OCFS2_I(inode)->ip_alloc_sem);
if (ret)
break;
@@ -973,7 +991,13 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
}
while (done < nr) {
- down_read(&OCFS2_I(inode)->ip_alloc_sem);
+ if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
+ rc = -EAGAIN;
+ mlog(ML_ERROR,
+ "Inode #%llu ip_alloc_sem is temporarily unavailable\n",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ break;
+ }
rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
&p_block, &p_count, NULL);
up_read(&OCFS2_I(inode)->ip_alloc_sem);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c45596c25c66..21d797ccccd0 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -233,16 +233,18 @@ int ocfs2_should_update_atime(struct inode *inode,
if (vfsmnt->mnt_flags & MNT_RELATIME) {
struct timespec64 ctime = inode_get_ctime(inode);
+ struct timespec64 atime = inode_get_atime(inode);
+ struct timespec64 mtime = inode_get_mtime(inode);
- if ((timespec64_compare(&inode->i_atime, &inode->i_mtime) <= 0) ||
- (timespec64_compare(&inode->i_atime, &ctime) <= 0))
+ if ((timespec64_compare(&atime, &mtime) <= 0) ||
+ (timespec64_compare(&atime, &ctime) <= 0))
return 1;
return 0;
}
now = current_time(inode);
- if ((now.tv_sec - inode->i_atime.tv_sec <= osb->s_atime_quantum))
+ if ((now.tv_sec - inode_get_atime_sec(inode) <= osb->s_atime_quantum))
return 0;
else
return 1;
@@ -275,9 +277,9 @@ int ocfs2_update_inode_atime(struct inode *inode,
* have i_rwsem to guard against concurrent changes to other
* inode fields.
*/
- inode->i_atime = current_time(inode);
- di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
- di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
+ inode_set_atime_to_ts(inode, current_time(inode));
+ di->i_atime = cpu_to_le64(inode_get_atime_sec(inode));
+ di->i_atime_nsec = cpu_to_le32(inode_get_atime_nsec(inode));
ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, bh);
@@ -296,7 +298,7 @@ int ocfs2_set_inode_size(handle_t *handle,
i_size_write(inode, new_i_size);
inode->i_blocks = ocfs2_inode_sector_count(inode);
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
if (status < 0) {
@@ -417,12 +419,12 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
}
i_size_write(inode, new_i_size);
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
di = (struct ocfs2_dinode *) fe_bh->b_data;
di->i_size = cpu_to_le64(new_i_size);
- di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
- di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
+ di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime_sec(inode));
+ di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, fe_bh);
@@ -753,7 +755,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
u64 abs_to, struct buffer_head *di_bh)
{
struct address_space *mapping = inode->i_mapping;
- struct page *page;
+ struct folio *folio;
unsigned long index = abs_from >> PAGE_SHIFT;
handle_t *handle;
int ret = 0;
@@ -772,18 +774,19 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
goto out;
}
- page = find_or_create_page(mapping, index, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS);
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
mlog_errno(ret);
goto out_commit_trans;
}
- /* Get the offsets within the page that we want to zero */
- zero_from = abs_from & (PAGE_SIZE - 1);
- zero_to = abs_to & (PAGE_SIZE - 1);
+ /* Get the offsets within the folio that we want to zero */
+ zero_from = offset_in_folio(folio, abs_from);
+ zero_to = offset_in_folio(folio, abs_to);
if (!zero_to)
- zero_to = PAGE_SIZE;
+ zero_to = folio_size(folio);
trace_ocfs2_write_zero_page(
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -801,7 +804,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
* __block_write_begin and block_commit_write to zero the
* whole block.
*/
- ret = __block_write_begin(page, block_start + 1, 0,
+ ret = __block_write_begin(folio, block_start + 1, 0,
ocfs2_get_block);
if (ret < 0) {
mlog_errno(ret);
@@ -810,20 +813,20 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
/* must not update i_size! */
- block_commit_write(page, block_start + 1, block_start + 1);
+ block_commit_write(folio, block_start + 1, block_start + 1);
}
/*
* fs-writeback will release the dirty pages without page lock
* whose offset are over inode size, the release happens at
- * block_write_full_page().
+ * block_write_full_folio().
*/
i_size_write(inode, abs_to);
inode->i_blocks = ocfs2_inode_sector_count(inode);
di->i_size = cpu_to_le64((u64)i_size_read(inode));
- inode->i_mtime = inode_set_ctime_current(inode);
- di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
- di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
+ di->i_mtime = di->i_ctime = cpu_to_le64(inode_get_mtime_sec(inode));
+ di->i_ctime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
di->i_mtime_nsec = di->i_ctime_nsec;
if (handle) {
ocfs2_journal_dirty(handle, di_bh);
@@ -831,8 +834,8 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
}
out_unlock:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
out_commit_trans:
if (handle)
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
@@ -1126,9 +1129,12 @@ int ocfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
trace_ocfs2_setattr(inode, dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
dentry->d_name.len, dentry->d_name.name,
- attr->ia_valid, attr->ia_mode,
- from_kuid(&init_user_ns, attr->ia_uid),
- from_kgid(&init_user_ns, attr->ia_gid));
+ attr->ia_valid,
+ attr->ia_valid & ATTR_MODE ? attr->ia_mode : 0,
+ attr->ia_valid & ATTR_UID ?
+ from_kuid(&init_user_ns, attr->ia_uid) : 0,
+ attr->ia_valid & ATTR_GID ?
+ from_kgid(&init_user_ns, attr->ia_gid) : 0);
/* ensuring we don't even attempt to truncate a symlink */
if (S_ISLNK(inode->i_mode))
@@ -1781,6 +1787,14 @@ int ocfs2_remove_inode_range(struct inode *inode,
return 0;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di);
+
+ if (byte_start > id_count || byte_start + byte_len > id_count) {
+ ret = -EINVAL;
+ mlog_errno(ret);
+ goto out;
+ }
+
ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
byte_start + byte_len, 0);
if (ret) {
@@ -1934,6 +1948,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
inode_lock(inode);
+ /* Wait all existing dio workers, newcomers will block on i_rwsem */
+ inode_dio_wait(inode);
/*
* This prevents concurrent writes on other nodes
*/
@@ -2040,7 +2056,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
goto out_inode_unlock;
}
- inode->i_mtime = inode_set_ctime_current(inode);
+ inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
if (ret < 0)
mlog_errno(ret);
@@ -2382,6 +2398,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
} else
inode_lock(inode);
+ ocfs2_iocb_init_rw_locked(iocb);
+
/*
* Concurrent O_DIRECT writes are allowed with
* mount_option "coherency=buffered".
@@ -2528,6 +2546,8 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
if (!direct_io && nowait)
return -EOPNOTSUPP;
+ ocfs2_iocb_init_rw_locked(iocb);
+
/*
* buffered reads protect themselves in ->read_folio(). O_DIRECT reads
* need locks to protect pending reads from racing with truncate.
@@ -2746,6 +2766,13 @@ out_unlock:
return remapped > 0 ? remapped : ret;
}
+static loff_t ocfs2_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct ocfs2_file_private *fp = file->private_data;
+
+ return generic_llseek_cookie(file, offset, whence, &fp->cookie);
+}
+
const struct inode_operations ocfs2_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
@@ -2761,6 +2788,7 @@ const struct inode_operations ocfs2_file_iops = {
const struct inode_operations ocfs2_special_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
+ .listxattr = ocfs2_listxattr,
.permission = ocfs2_permission,
.get_inode_acl = ocfs2_iop_get_acl,
.set_acl = ocfs2_iop_set_acl,
@@ -2772,7 +2800,7 @@ const struct inode_operations ocfs2_special_file_iops = {
*/
const struct file_operations ocfs2_fops = {
.llseek = ocfs2_file_llseek,
- .mmap = ocfs2_mmap,
+ .mmap_prepare = ocfs2_mmap_prepare,
.fsync = ocfs2_sync_file,
.release = ocfs2_file_release,
.open = ocfs2_file_open,
@@ -2788,11 +2816,12 @@ const struct file_operations ocfs2_fops = {
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
.remap_file_range = ocfs2_remap_file_range,
+ .fop_flags = FOP_ASYNC_LOCK,
};
WRAP_DIR_ITER(ocfs2_readdir) // FIXME!
const struct file_operations ocfs2_dops = {
- .llseek = generic_file_llseek,
+ .llseek = ocfs2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,
@@ -2804,6 +2833,7 @@ const struct file_operations ocfs2_dops = {
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
+ .fop_flags = FOP_ASYNC_LOCK,
};
/*
@@ -2820,7 +2850,7 @@ const struct file_operations ocfs2_dops = {
*/
const struct file_operations ocfs2_fops_no_plocks = {
.llseek = ocfs2_file_llseek,
- .mmap = ocfs2_mmap,
+ .mmap_prepare = ocfs2_mmap_prepare,
.fsync = ocfs2_sync_file,
.release = ocfs2_file_release,
.open = ocfs2_file_open,
@@ -2838,7 +2868,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
};
const struct file_operations ocfs2_dops_no_plocks = {
- .llseek = generic_file_llseek,
+ .llseek = ocfs2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 8e53e4ac1120..41e65e45a9f3 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -20,6 +20,7 @@ struct ocfs2_alloc_context;
enum ocfs2_alloc_restarted;
struct ocfs2_file_private {
+ u64 cookie;
struct file *fp_file;
struct mutex fp_mutex;
struct ocfs2_lock_res fp_flock;
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 1ad7106741f8..3ad7baf67658 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -505,5 +505,5 @@ static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj,
ocfs2_filecheck_handle_entry(ent, entry);
exit:
- return (!ret ? count : ret);
+ return ret ?: count;
}
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index e8771600b930..8340525e5589 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -50,8 +50,6 @@ struct ocfs2_find_inode_args
unsigned int fi_sysfile_type;
};
-static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES];
-
static int ocfs2_read_locked_inode(struct inode *inode,
struct ocfs2_find_inode_args *args);
static int ocfs2_init_locked_inode(struct inode *inode, void *opaque);
@@ -154,8 +152,8 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
mlog_errno(PTR_ERR(inode));
goto bail;
}
- trace_ocfs2_iget5_locked(inode->i_state);
- if (inode->i_state & I_NEW) {
+ trace_ocfs2_iget5_locked(inode_state_read_once(inode));
+ if (inode_state_read_once(inode) & I_NEW) {
rc = ocfs2_read_locked_inode(inode, &args);
unlock_new_inode(inode);
}
@@ -200,6 +198,22 @@ bail:
return inode;
}
+static int ocfs2_dinode_has_extents(struct ocfs2_dinode *di)
+{
+ /* inodes flagged with other stuff in id2 */
+ if (le32_to_cpu(di->i_flags) &
+ (OCFS2_SUPER_BLOCK_FL | OCFS2_LOCAL_ALLOC_FL | OCFS2_CHAIN_FL |
+ OCFS2_DEALLOC_FL))
+ return 0;
+ /* i_flags doesn't indicate when id2 is a fast symlink */
+ if (S_ISLNK(le16_to_cpu(di->i_mode)) && le64_to_cpu(di->i_size) &&
+ !le32_to_cpu(di->i_clusters))
+ return 0;
+ if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)
+ return 0;
+
+ return 1;
+}
/*
* here's how inodes get read from disk:
@@ -236,14 +250,77 @@ bail:
static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
{
struct ocfs2_find_inode_args *args = opaque;
+#ifdef CONFIG_LOCKDEP
+ static struct lock_class_key ocfs2_sysfile_lock_key[NUM_SYSTEM_INODES];
static struct lock_class_key ocfs2_quota_ip_alloc_sem_key,
ocfs2_file_ip_alloc_sem_key;
+#endif
inode->i_ino = args->fi_ino;
OCFS2_I(inode)->ip_blkno = args->fi_blkno;
- if (args->fi_sysfile_type != 0)
+#ifdef CONFIG_LOCKDEP
+ switch (args->fi_sysfile_type) {
+ case BAD_BLOCK_SYSTEM_INODE:
+ break;
+ case GLOBAL_INODE_ALLOC_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[GLOBAL_INODE_ALLOC_SYSTEM_INODE]);
+ break;
+ case SLOT_MAP_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[SLOT_MAP_SYSTEM_INODE]);
+ break;
+ case HEARTBEAT_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[HEARTBEAT_SYSTEM_INODE]);
+ break;
+ case GLOBAL_BITMAP_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[GLOBAL_BITMAP_SYSTEM_INODE]);
+ break;
+ case USER_QUOTA_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[USER_QUOTA_SYSTEM_INODE]);
+ break;
+ case GROUP_QUOTA_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[GROUP_QUOTA_SYSTEM_INODE]);
+ break;
+ case ORPHAN_DIR_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[ORPHAN_DIR_SYSTEM_INODE]);
+ break;
+ case EXTENT_ALLOC_SYSTEM_INODE:
lockdep_set_class(&inode->i_rwsem,
- &ocfs2_sysfile_lock_key[args->fi_sysfile_type]);
+ &ocfs2_sysfile_lock_key[EXTENT_ALLOC_SYSTEM_INODE]);
+ break;
+ case INODE_ALLOC_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[INODE_ALLOC_SYSTEM_INODE]);
+ break;
+ case JOURNAL_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[JOURNAL_SYSTEM_INODE]);
+ break;
+ case LOCAL_ALLOC_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[LOCAL_ALLOC_SYSTEM_INODE]);
+ break;
+ case TRUNCATE_LOG_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[TRUNCATE_LOG_SYSTEM_INODE]);
+ break;
+ case LOCAL_USER_QUOTA_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[LOCAL_USER_QUOTA_SYSTEM_INODE]);
+ break;
+ case LOCAL_GROUP_QUOTA_SYSTEM_INODE:
+ lockdep_set_class(&inode->i_rwsem,
+ &ocfs2_sysfile_lock_key[LOCAL_GROUP_QUOTA_SYSTEM_INODE]);
+ break;
+ default:
+ WARN_ONCE(1, "Unknown sysfile type %d\n", args->fi_sysfile_type);
+ }
if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE ||
args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE ||
args->fi_sysfile_type == LOCAL_USER_QUOTA_SYSTEM_INODE ||
@@ -253,6 +330,7 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque)
else
lockdep_set_class(&OCFS2_I(inode)->ip_alloc_sem,
&ocfs2_file_ip_alloc_sem_key);
+#endif
return 0;
}
@@ -302,10 +380,10 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
inode->i_blocks = ocfs2_inode_sector_count(inode);
inode->i_mapping->a_ops = &ocfs2_aops;
}
- inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
- inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
- inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
- inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
+ inode_set_atime(inode, le64_to_cpu(fe->i_atime),
+ le32_to_cpu(fe->i_atime_nsec));
+ inode_set_mtime(inode, le64_to_cpu(fe->i_mtime),
+ le32_to_cpu(fe->i_mtime_nsec));
inode_set_ctime(inode, le64_to_cpu(fe->i_ctime),
le32_to_cpu(fe->i_ctime_nsec));
@@ -1122,7 +1200,7 @@ static void ocfs2_clear_inode(struct inode *inode)
dquot_drop(inode);
- /* To preven remote deletes we hold open lock before, now it
+ /* To prevent remote deletes we hold open lock before, now it
* is time to unlock PR and EX open locks. */
ocfs2_open_unlock(inode);
@@ -1205,12 +1283,17 @@ static void ocfs2_clear_inode(struct inode *inode)
* the journal is flushed before journal shutdown. Thus it is safe to
* have inodes get cleaned up after journal shutdown.
*/
+ if (!osb->journal)
+ return;
+
jbd2_journal_release_jbd_inode(osb->journal->j_journal,
&oi->ip_jinode);
}
void ocfs2_evict_inode(struct inode *inode)
{
+ write_inode_now(inode, 1);
+
if (!inode->i_nlink ||
(OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) {
ocfs2_delete_inode(inode);
@@ -1220,27 +1303,6 @@ void ocfs2_evict_inode(struct inode *inode)
ocfs2_clear_inode(inode);
}
-/* Called under inode_lock, with no more references on the
- * struct inode, so it's safe here to check the flags field
- * and to manipulate i_nlink without any other locks. */
-int ocfs2_drop_inode(struct inode *inode)
-{
- struct ocfs2_inode_info *oi = OCFS2_I(inode);
-
- trace_ocfs2_drop_inode((unsigned long long)oi->ip_blkno,
- inode->i_nlink, oi->ip_flags);
-
- assert_spin_locked(&inode->i_lock);
- inode->i_state |= I_WILL_FREE;
- spin_unlock(&inode->i_lock);
- write_inode_now(inode, 1);
- spin_lock(&inode->i_lock);
- WARN_ON(inode->i_state & I_NEW);
- inode->i_state &= ~I_WILL_FREE;
-
- return 1;
-}
-
/*
* This is called from our getattr.
*/
@@ -1312,12 +1374,12 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
fe->i_uid = cpu_to_le32(i_uid_read(inode));
fe->i_gid = cpu_to_le32(i_gid_read(inode));
fe->i_mode = cpu_to_le16(inode->i_mode);
- fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
- fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
- fe->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
- fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
- fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
- fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ fe->i_atime = cpu_to_le64(inode_get_atime_sec(inode));
+ fe->i_atime_nsec = cpu_to_le32(inode_get_atime_nsec(inode));
+ fe->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
+ fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
+ fe->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode));
+ fe->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode));
ocfs2_journal_dirty(handle, bh);
ocfs2_update_inode_fsync_trans(handle, inode, 1);
@@ -1348,10 +1410,10 @@ void ocfs2_refresh_inode(struct inode *inode,
inode->i_blocks = 0;
else
inode->i_blocks = ocfs2_inode_sector_count(inode);
- inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime);
- inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
- inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
- inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
+ inode_set_atime(inode, le64_to_cpu(fe->i_atime),
+ le32_to_cpu(fe->i_atime_nsec));
+ inode_set_mtime(inode, le64_to_cpu(fe->i_mtime),
+ le32_to_cpu(fe->i_mtime_nsec));
inode_set_ctime(inode, le64_to_cpu(fe->i_ctime),
le32_to_cpu(fe->i_ctime_nsec));
@@ -1400,7 +1462,7 @@ int ocfs2_validate_inode_block(struct super_block *sb,
goto bail;
}
- if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
+ if (!(le32_to_cpu(di->i_flags) & OCFS2_VALID_FL)) {
rc = ocfs2_error(sb,
"Invalid dinode #%llu: OCFS2_VALID_FL not set\n",
(unsigned long long)bh->b_blocknr);
@@ -1416,6 +1478,49 @@ int ocfs2_validate_inode_block(struct super_block *sb,
goto bail;
}
+ if (le16_to_cpu(di->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT &&
+ (u32)le16_to_cpu(di->i_suballoc_slot) > OCFS2_SB(sb)->max_slots - 1) {
+ rc = ocfs2_error(sb, "Invalid dinode %llu: suballoc slot %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(di->i_suballoc_slot));
+ goto bail;
+ }
+
+ if ((le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) &&
+ le32_to_cpu(di->i_clusters)) {
+ rc = ocfs2_error(sb, "Invalid dinode %llu: %u clusters\n",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(di->i_clusters));
+ goto bail;
+ }
+
+ if (le32_to_cpu(di->i_flags) & OCFS2_CHAIN_FL) {
+ struct ocfs2_chain_list *cl = &di->id2.i_chain;
+ u16 bpc = 1 << (OCFS2_SB(sb)->s_clustersize_bits -
+ sb->s_blocksize_bits);
+
+ if (le16_to_cpu(cl->cl_count) != ocfs2_chain_recs_per_inode(sb)) {
+ rc = ocfs2_error(sb, "Invalid dinode %llu: chain list count %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(cl->cl_count));
+ goto bail;
+ }
+ if (le16_to_cpu(cl->cl_next_free_rec) > le16_to_cpu(cl->cl_count)) {
+ rc = ocfs2_error(sb, "Invalid dinode %llu: chain list index %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(cl->cl_next_free_rec));
+ goto bail;
+ }
+ if (OCFS2_SB(sb)->bitmap_blkno &&
+ OCFS2_SB(sb)->bitmap_blkno != le64_to_cpu(di->i_blkno) &&
+ le16_to_cpu(cl->cl_bpc) != bpc) {
+ rc = ocfs2_error(sb, "Invalid dinode %llu: bits per cluster %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(cl->cl_bpc));
+ goto bail;
+ }
+ }
+
rc = 0;
bail:
@@ -1437,7 +1542,7 @@ static int ocfs2_filecheck_validate_inode_block(struct super_block *sb,
* Call ocfs2_validate_meta_ecc() first since it has ecc repair
* function, but we should not return error immediately when ecc
* validation fails, because the reason is quite likely the invalid
- * inode number inputed.
+ * inode number inputted.
*/
rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
if (rc) {
@@ -1547,6 +1652,16 @@ static int ocfs2_filecheck_repair_inode_block(struct super_block *sb,
le32_to_cpu(di->i_fs_generation));
}
+ if (ocfs2_dinode_has_extents(di) &&
+ le16_to_cpu(di->id2.i_list.l_next_free_rec) > le16_to_cpu(di->id2.i_list.l_count)) {
+ di->id2.i_list.l_next_free_rec = di->id2.i_list.l_count;
+ changed = 1;
+ mlog(ML_ERROR,
+ "Filecheck: reset dinode #%llu: l_next_free_rec to %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(di->id2.i_list.l_next_free_rec));
+ }
+
if (changed || ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check)) {
ocfs2_compute_meta_ecc(sb, bh->b_data, &di->i_check);
mark_buffer_dirty(bh);
@@ -1593,6 +1708,8 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno,
1, &tmp, flags, ocfs2_validate_inode_block);
+ if (rc < 0)
+ make_bad_inode(inode);
/* If ocfs2_read_blocks() got us a new bh, pass it up. */
if (!rc && !*bh)
*bh = tmp;
@@ -1621,6 +1738,7 @@ static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info
}
static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
+__acquires(&oi->ip_lock)
{
struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
@@ -1628,6 +1746,7 @@ static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
}
static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci)
+__releases(&oi->ip_lock)
{
struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 82b28fdacc7e..07bd838e7843 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -65,7 +65,7 @@ struct ocfs2_inode_info
tid_t i_sync_tid;
tid_t i_datasync_tid;
- struct dquot *i_dquot[MAXQUOTAS];
+ struct dquot __rcu *i_dquot[MAXQUOTAS];
};
/*
@@ -116,7 +116,6 @@ static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode)
}
void ocfs2_evict_inode(struct inode *inode);
-int ocfs2_drop_inode(struct inode *inode);
/* Flags for ocfs2_iget() */
#define OCFS2_FI_FLAG_SYSFILE 0x1
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index b1550ba73f96..b6864602814c 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -62,7 +62,7 @@ static inline int o2info_coherent(struct ocfs2_info_request *req)
return (!(req->ir_flags & OCFS2_INFO_FL_NON_COHERENT));
}
-int ocfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa)
+int ocfs2_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
{
struct inode *inode = d_inode(dentry);
unsigned int flags;
@@ -83,7 +83,7 @@ int ocfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa)
}
int ocfs2_fileattr_set(struct mnt_idmap *idmap,
- struct dentry *dentry, struct fileattr *fa)
+ struct dentry *dentry, struct file_kattr *fa)
{
struct inode *inode = d_inode(dentry);
unsigned int flags = fa->flags;
@@ -125,6 +125,7 @@ int ocfs2_fileattr_set(struct mnt_idmap *idmap,
ocfs2_inode->ip_attr = flags;
ocfs2_set_inode_flags(inode);
+ inode_set_ctime_current(inode);
status = ocfs2_mark_inode_dirty(handle, inode, bh);
if (status < 0)
@@ -357,13 +358,11 @@ static int ocfs2_info_handle_freeinode(struct inode *inode,
goto bail;
}
} else {
- ocfs2_sprintf_system_inode_name(namebuf,
- sizeof(namebuf),
- type, i);
+ int len = ocfs2_sprintf_system_inode_name(namebuf,
+ sizeof(namebuf),
+ type, i);
status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
- namebuf,
- strlen(namebuf),
- &blkno);
+ namebuf, len, &blkno);
if (status < 0) {
status = -ENOENT;
goto bail;
@@ -650,12 +649,10 @@ static int ocfs2_info_handle_freefrag(struct inode *inode,
goto bail;
}
} else {
- ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type,
- OCFS2_INVALID_SLOT);
+ int len = ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf),
+ type, OCFS2_INVALID_SLOT);
status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
- namebuf,
- strlen(namebuf),
- &blkno);
+ namebuf, len, &blkno);
if (status < 0) {
status = -ENOENT;
goto bail;
@@ -795,7 +792,7 @@ bail:
/*
* OCFS2_IOC_INFO handles an array of requests passed from userspace.
*
- * ocfs2_info_handle() recevies a large info aggregation, grab and
+ * ocfs2_info_handle() receives a large info aggregation, grab and
* validate the request count from header, then break it into small
* pieces, later specific handlers can handle them one by one.
*
diff --git a/fs/ocfs2/ioctl.h b/fs/ocfs2/ioctl.h
index 48a5fdfe87a1..4a1c2313b429 100644
--- a/fs/ocfs2/ioctl.h
+++ b/fs/ocfs2/ioctl.h
@@ -11,9 +11,9 @@
#ifndef OCFS2_IOCTL_PROTO_H
#define OCFS2_IOCTL_PROTO_H
-int ocfs2_fileattr_get(struct dentry *dentry, struct fileattr *fa);
+int ocfs2_fileattr_get(struct dentry *dentry, struct file_kattr *fa);
int ocfs2_fileattr_set(struct mnt_idmap *idmap,
- struct dentry *dentry, struct fileattr *fa);
+ struct dentry *dentry, struct file_kattr *fa);
long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index e8e7d47265aa..85239807dec7 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -90,7 +90,7 @@ enum ocfs2_replay_state {
struct ocfs2_replay_map {
unsigned int rm_slots;
enum ocfs2_replay_state rm_state;
- unsigned char rm_replay_slots[];
+ unsigned char rm_replay_slots[] __counted_by(rm_slots);
};
static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
@@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
struct ocfs2_recovery_map *rm;
mutex_init(&osb->recovery_lock);
- osb->disable_recovery = 0;
+ osb->recovery_state = OCFS2_REC_ENABLED;
osb->recovery_thread_task = NULL;
init_waitqueue_head(&osb->recovery_event);
@@ -190,31 +190,53 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
return 0;
}
-/* we can't grab the goofy sem lock from inside wait_event, so we use
- * memory barriers to make sure that we'll see the null task before
- * being woken up */
static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
{
- mb();
return osb->recovery_thread_task != NULL;
}
-void ocfs2_recovery_exit(struct ocfs2_super *osb)
+static void ocfs2_recovery_disable(struct ocfs2_super *osb,
+ enum ocfs2_recovery_state state)
{
- struct ocfs2_recovery_map *rm;
-
- /* disable any new recovery threads and wait for any currently
- * running ones to exit. Do this before setting the vol_state. */
mutex_lock(&osb->recovery_lock);
- osb->disable_recovery = 1;
+ /*
+ * If recovery thread is not running, we can directly transition to
+ * final state.
+ */
+ if (!ocfs2_recovery_thread_running(osb)) {
+ osb->recovery_state = state + 1;
+ goto out_lock;
+ }
+ osb->recovery_state = state;
+ /* Wait for recovery thread to acknowledge state transition */
+ wait_event_cmd(osb->recovery_event,
+ !ocfs2_recovery_thread_running(osb) ||
+ osb->recovery_state >= state + 1,
+ mutex_unlock(&osb->recovery_lock),
+ mutex_lock(&osb->recovery_lock));
+out_lock:
mutex_unlock(&osb->recovery_lock);
- wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
- /* At this point, we know that no more recovery threads can be
- * launched, so wait for any recovery completion work to
- * complete. */
+ /*
+ * At this point we know that no more recovery work can be queued so
+ * wait for any recovery completion work to complete.
+ */
if (osb->ocfs2_wq)
flush_workqueue(osb->ocfs2_wq);
+}
+
+void ocfs2_recovery_disable_quota(struct ocfs2_super *osb)
+{
+ ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE);
+}
+
+void ocfs2_recovery_exit(struct ocfs2_super *osb)
+{
+ struct ocfs2_recovery_map *rm;
+
+ /* disable any new recovery threads and wait for any currently
+ * running ones to exit. Do this before setting the vol_state. */
+ ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE);
/*
* Now that recovery is shut down, and the osb is about to be
@@ -446,6 +468,23 @@ bail:
}
/*
+ * Make sure handle has at least 'nblocks' credits available. If it does not
+ * have that many credits available, we will try to extend the handle to have
+ * enough credits. If that fails, we will restart transaction to have enough
+ * credits. Similar notes regarding data consistency and locking implications
+ * as for ocfs2_extend_trans() apply here.
+ */
+int ocfs2_assure_trans_credits(handle_t *handle, int nblocks)
+{
+ int old_nblks = jbd2_handle_buffer_credits(handle);
+
+ trace_ocfs2_assure_trans_credits(old_nblks);
+ if (old_nblks >= nblocks)
+ return 0;
+ return ocfs2_extend_trans(handle, nblocks - old_nblks);
+}
+
+/*
* If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA.
* If that fails, restart the transaction & regain write access for the
* buffer head which is used for metadata modifications.
@@ -479,12 +518,6 @@ bail:
return status;
}
-
-struct ocfs2_triggers {
- struct jbd2_buffer_trigger_type ot_triggers;
- int ot_offset;
-};
-
static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
{
return container_of(triggers, struct ocfs2_triggers, ot_triggers);
@@ -548,85 +581,76 @@ static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh)
{
+ struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
+
mlog(ML_ERROR,
"ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
"bh->b_blocknr = %llu\n",
(unsigned long)bh,
(unsigned long long)bh->b_blocknr);
- ocfs2_error(bh->b_assoc_map->host->i_sb,
+ ocfs2_error(ot->sb,
"JBD2 has aborted our journal, ocfs2 cannot continue\n");
}
-static struct ocfs2_triggers di_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dinode, i_check),
-};
-
-static struct ocfs2_triggers eb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
-};
-
-static struct ocfs2_triggers rb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
-};
-
-static struct ocfs2_triggers gd_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
-};
-
-static struct ocfs2_triggers db_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_db_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
-};
+static void ocfs2_setup_csum_triggers(struct super_block *sb,
+ enum ocfs2_journal_trigger_type type,
+ struct ocfs2_triggers *ot)
+{
+ BUG_ON(type >= OCFS2_JOURNAL_TRIGGER_COUNT);
-static struct ocfs2_triggers xb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
-};
+ switch (type) {
+ case OCFS2_JTR_DI:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dinode, i_check);
+ break;
+ case OCFS2_JTR_EB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_extent_block, h_check);
+ break;
+ case OCFS2_JTR_RB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_refcount_block, rf_check);
+ break;
+ case OCFS2_JTR_GD:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_group_desc, bg_check);
+ break;
+ case OCFS2_JTR_DB:
+ ot->ot_triggers.t_frozen = ocfs2_db_frozen_trigger;
+ break;
+ case OCFS2_JTR_XB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_xattr_block, xb_check);
+ break;
+ case OCFS2_JTR_DQ:
+ ot->ot_triggers.t_frozen = ocfs2_dq_frozen_trigger;
+ break;
+ case OCFS2_JTR_DR:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check);
+ break;
+ case OCFS2_JTR_DL:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check);
+ break;
+ case OCFS2_JTR_NONE:
+ /* To make compiler happy... */
+ return;
+ }
-static struct ocfs2_triggers dq_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_dq_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
-};
+ ot->ot_triggers.t_abort = ocfs2_abort_trigger;
+ ot->sb = sb;
+}
-static struct ocfs2_triggers dr_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
-};
+void ocfs2_initialize_journal_triggers(struct super_block *sb,
+ struct ocfs2_triggers triggers[])
+{
+ enum ocfs2_journal_trigger_type type;
-static struct ocfs2_triggers dl_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
-};
+ for (type = OCFS2_JTR_DI; type < OCFS2_JOURNAL_TRIGGER_COUNT; type++)
+ ocfs2_setup_csum_triggers(sb, type, &triggers[type]);
+}
static int __ocfs2_journal_access(handle_t *handle,
struct ocfs2_caching_info *ci,
@@ -708,56 +732,91 @@ static int __ocfs2_journal_access(handle_t *handle,
int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DI],
+ type);
}
int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_EB],
+ type);
}
int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_RB],
type);
}
int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_GD],
+ type);
}
int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DB],
+ type);
}
int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_XB],
+ type);
}
int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DQ],
+ type);
}
int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DR],
+ type);
}
int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DL],
+ type);
}
int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
@@ -778,13 +837,15 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
if (!is_handle_aborted(handle)) {
journal_t *journal = handle->h_transaction->t_journal;
- mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
- "Aborting transaction and journal.\n");
+ mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed: "
+ "handle type %u started at line %u, credits %u/%u "
+ "errcode %d. Aborting transaction and journal.\n",
+ handle->h_type, handle->h_line_no,
+ handle->h_requested_credits,
+ jbd2_handle_buffer_credits(handle), status);
handle->h_err = status;
jbd2_journal_abort_handle(handle);
jbd2_journal_abort(journal, status);
- ocfs2_abort(bh->b_assoc_map->host->i_sb,
- "Journal already aborted.\n");
}
}
}
@@ -841,15 +902,8 @@ bail:
static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
{
- struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = mapping->nrpages * 2,
- .range_start = jinode->i_dirty_start,
- .range_end = jinode->i_dirty_end,
- };
-
- return filemap_fdatawrite_wbc(mapping, &wbc);
+ return filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
+ jinode->i_dirty_start, jinode->i_dirty_end);
}
int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
@@ -908,9 +962,9 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
/* call the kernels journal init function now */
j_journal = jbd2_journal_init_inode(inode);
- if (j_journal == NULL) {
+ if (IS_ERR(j_journal)) {
mlog(ML_ERROR, "Linux journal layer error\n");
- status = -EINVAL;
+ status = PTR_ERR(j_journal);
goto done;
}
@@ -1016,7 +1070,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
if (!igrab(inode))
BUG();
- num_running_trans = atomic_read(&(osb->journal->j_num_trans));
+ num_running_trans = atomic_read(&(journal->j_num_trans));
trace_ocfs2_journal_shutdown(num_running_trans);
/* Do a commit_cache here. It will flush our journal, *and*
@@ -1035,9 +1089,10 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
osb->commit_task = NULL;
}
- BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
+ BUG_ON(atomic_read(&(journal->j_num_trans)) != 0);
- if (ocfs2_mount_local(osb)) {
+ if (ocfs2_mount_local(osb) &&
+ (journal->j_journal->j_flags & JBD2_LOADED)) {
jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal, 0);
jbd2_journal_unlock_updates(journal->j_journal);
@@ -1209,7 +1264,7 @@ static int ocfs2_force_read_journal(struct inode *inode)
}
for (i = 0; i < p_blocks; i++, p_blkno++) {
- bh = __find_get_block(osb->sb->s_bdev, p_blkno,
+ bh = __find_get_block_nonatomic(osb->sb->s_bdev, p_blkno,
osb->sb->s_blocksize);
/* block not cached. */
if (!bh)
@@ -1432,6 +1487,18 @@ static int __ocfs2_recovery_thread(void *arg)
}
}
restart:
+ if (quota_enabled) {
+ mutex_lock(&osb->recovery_lock);
+ /* Confirm that recovery thread will no longer recover quotas */
+ if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) {
+ osb->recovery_state = OCFS2_REC_QUOTA_DISABLED;
+ wake_up(&osb->recovery_event);
+ }
+ if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED)
+ quota_enabled = 0;
+ mutex_unlock(&osb->recovery_lock);
+ }
+
status = ocfs2_super_lock(osb, 1);
if (status < 0) {
mlog_errno(status);
@@ -1529,27 +1596,29 @@ bail:
ocfs2_free_replay_slots(osb);
osb->recovery_thread_task = NULL;
- mb(); /* sync with ocfs2_recovery_thread_running */
+ if (osb->recovery_state == OCFS2_REC_WANT_DISABLE)
+ osb->recovery_state = OCFS2_REC_DISABLED;
wake_up(&osb->recovery_event);
mutex_unlock(&osb->recovery_lock);
- if (quota_enabled)
- kfree(rm_quota);
+ kfree(rm_quota);
return status;
}
void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
{
+ int was_set = -1;
+
mutex_lock(&osb->recovery_lock);
+ if (osb->recovery_state < OCFS2_REC_WANT_DISABLE)
+ was_set = ocfs2_recovery_map_set(osb, node_num);
trace_ocfs2_recovery_thread(node_num, osb->node_num,
- osb->disable_recovery, osb->recovery_thread_task,
- osb->disable_recovery ?
- -1 : ocfs2_recovery_map_set(osb, node_num));
+ osb->recovery_state, osb->recovery_thread_task, was_set);
- if (osb->disable_recovery)
+ if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE)
goto out;
if (osb->recovery_thread_task)
@@ -1684,9 +1753,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
}
journal = jbd2_journal_init_inode(inode);
- if (journal == NULL) {
+ if (IS_ERR(journal)) {
mlog(ML_ERROR, "Linux journal layer error\n");
- status = -EIO;
+ status = PTR_ERR(journal);
goto done;
}
@@ -1916,7 +1985,7 @@ bail:
/*
* Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some
- * randomness to the timeout to minimize multple nodes firing the timer at the
+ * randomness to the timeout to minimize multiple nodes firing the timer at the
* same time.
*/
static inline unsigned long ocfs2_orphan_scan_timeout(void)
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 41c9fe7e62f9..6397170f302f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
int ocfs2_recovery_init(struct ocfs2_super *osb);
void ocfs2_recovery_exit(struct ocfs2_super *osb);
+void ocfs2_recovery_disable_quota(struct ocfs2_super *osb);
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
void ocfs2_free_replay_slots(struct ocfs2_super *osb);
@@ -243,6 +244,8 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb,
int ocfs2_commit_trans(struct ocfs2_super *osb,
handle_t *handle);
int ocfs2_extend_trans(handle_t *handle, int nblocks);
+int ocfs2_assure_trans_credits(handle_t *handle,
+ int nblocks);
int ocfs2_allocate_extend_trans(handle_t *handle,
int thresh);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index c803c10dd97e..d1aa04a5af1b 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -212,14 +212,15 @@ static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
unsigned int num_clusters)
{
- spin_lock(&osb->osb_lock);
- if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
- osb->local_alloc_state == OCFS2_LA_THROTTLED)
- if (num_clusters >= osb->local_alloc_default_bits) {
+ if (num_clusters >= osb->local_alloc_default_bits) {
+ spin_lock(&osb->osb_lock);
+ if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
+ osb->local_alloc_state == OCFS2_LA_THROTTLED) {
cancel_delayed_work(&osb->la_enable_wq);
osb->local_alloc_state = OCFS2_LA_ENABLED;
}
- spin_unlock(&osb->osb_lock);
+ spin_unlock(&osb->osb_lock);
+ }
}
void ocfs2_la_enable_worker(struct work_struct *work)
@@ -335,7 +336,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
"found = %u, set = %u, taken = %u, off = %u\n",
num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
le32_to_cpu(alloc->id1.bitmap1.i_total),
- OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
+ le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off));
status = -EINVAL;
goto bail;
@@ -863,14 +864,8 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
numfound = bitoff = startoff = 0;
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
- while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
- if (bitoff == left) {
- /* mlog(0, "bitoff (%d) == left", bitoff); */
- break;
- }
- /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
- "numfound = %d\n", bitoff, startoff, numfound);*/
-
+ while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) <
+ left) {
/* Ok, we found a zero bit... is it contig. or do we
* start over?*/
if (bitoff == startoff) {
@@ -976,8 +971,8 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
start = count = 0;
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
- while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
- != -1) {
+ while (1) {
+ bit_off = ocfs2_find_next_zero_bit(bitmap, left, start);
if ((bit_off < left) && (bit_off == start)) {
count++;
start++;
@@ -1002,6 +997,7 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
goto bail;
}
}
+
if (bit_off >= left)
break;
count = 1;
@@ -1220,7 +1216,7 @@ retry_enospc:
OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
trace_ocfs2_local_alloc_new_window_result(
- OCFS2_LOCAL_ALLOC(alloc)->la_bm_off,
+ le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off),
le32_to_cpu(alloc->id1.bitmap1.i_total));
bail:
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index f37174e79fad..6de944818c56 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -27,7 +27,7 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode,
struct ocfs2_file_private *fp = file->private_data;
struct ocfs2_lock_res *lockres = &fp->fp_flock;
- if (fl->fl_type == F_WRLCK)
+ if (lock_is_write(fl))
level = 1;
if (!IS_SETLKW(cmd))
trylock = 1;
@@ -53,8 +53,8 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode,
*/
locks_init_lock(&request);
- request.fl_type = F_UNLCK;
- request.fl_flags = FL_FLOCK;
+ request.c.flc_type = F_UNLCK;
+ request.c.flc_flags = FL_FLOCK;
locks_lock_file_wait(file, &request);
ocfs2_file_unlock(file);
@@ -100,14 +100,14 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- if (!(fl->fl_flags & FL_FLOCK))
+ if (!(fl->c.flc_flags & FL_FLOCK))
return -ENOLCK;
if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
ocfs2_mount_local(osb))
return locks_lock_file_wait(file, fl);
- if (fl->fl_type == F_UNLCK)
+ if (lock_is_unlock(fl))
return ocfs2_do_funlock(file, cmd, fl);
else
return ocfs2_do_flock(file, inode, cmd, fl);
@@ -118,7 +118,7 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
struct inode *inode = file->f_mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- if (!(fl->fl_flags & FL_POSIX))
+ if (!(fl->c.flc_flags & FL_POSIX))
return -ENOLCK;
return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 1834f26522ed..50e2faf64c19 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -44,16 +44,16 @@ static vm_fault_t ocfs2_fault(struct vm_fault *vmf)
}
static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
- struct buffer_head *di_bh, struct page *page)
+ struct buffer_head *di_bh, struct folio *folio)
{
int err;
vm_fault_t ret = VM_FAULT_NOPAGE;
struct inode *inode = file_inode(file);
struct address_space *mapping = inode->i_mapping;
- loff_t pos = page_offset(page);
+ loff_t pos = folio_pos(folio);
unsigned int len = PAGE_SIZE;
pgoff_t last_index;
- struct page *locked_page = NULL;
+ struct folio *locked_folio = NULL;
void *fsdata;
loff_t size = i_size_read(inode);
@@ -72,9 +72,9 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
*
* Let VM retry with these cases.
*/
- if ((page->mapping != inode->i_mapping) ||
- (!PageUptodate(page)) ||
- (page_offset(page) >= size))
+ if ((folio->mapping != inode->i_mapping) ||
+ !folio_test_uptodate(folio) ||
+ (pos >= size))
goto out;
/*
@@ -87,11 +87,11 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
* worry about ocfs2_write_begin() skipping some buffer reads
* because the "write" would invalidate their data.
*/
- if (page->index == last_index)
+ if (folio->index == last_index)
len = ((size - 1) & ~PAGE_MASK) + 1;
err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
- &locked_page, &fsdata, di_bh, page);
+ &locked_folio, &fsdata, di_bh, folio);
if (err) {
if (err != -ENOSPC)
mlog_errno(err);
@@ -99,7 +99,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
goto out;
}
- if (!locked_page) {
+ if (!locked_folio) {
ret = VM_FAULT_NOPAGE;
goto out;
}
@@ -112,7 +112,7 @@ out:
static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vmf->vma->vm_file);
struct buffer_head *di_bh = NULL;
sigset_t oldset;
@@ -141,7 +141,7 @@ static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
- ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page);
+ ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, folio);
up_write(&OCFS2_I(inode)->ip_alloc_sem);
@@ -159,8 +159,9 @@ static const struct vm_operations_struct ocfs2_file_vm_ops = {
.page_mkwrite = ocfs2_page_mkwrite,
};
-int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
+int ocfs2_mmap_prepare(struct vm_area_desc *desc)
{
+ struct file *file = desc->file;
int ret = 0, lock_level = 0;
ret = ocfs2_inode_lock_atime(file_inode(file),
@@ -171,7 +172,7 @@ int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
}
ocfs2_inode_unlock(file_inode(file), lock_level);
out:
- vma->vm_ops = &ocfs2_file_vm_ops;
+ desc->vm_ops = &ocfs2_file_vm_ops;
return 0;
}
diff --git a/fs/ocfs2/mmap.h b/fs/ocfs2/mmap.h
index 1051507cc684..d21c30de6b8c 100644
--- a/fs/ocfs2/mmap.h
+++ b/fs/ocfs2/mmap.h
@@ -2,6 +2,6 @@
#ifndef OCFS2_MMAP_H
#define OCFS2_MMAP_H
-int ocfs2_mmap(struct file *file, struct vm_area_struct *vma);
+int ocfs2_mmap_prepare(struct vm_area_desc *desc);
#endif /* OCFS2_MMAP_H */
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 05d67968a3a9..ce978a2497d9 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -98,7 +98,13 @@ static int __ocfs2_move_extent(handle_t *handle,
rec = &el->l_recs[index];
- BUG_ON(ext_flags != rec->e_flags);
+ if (ext_flags != rec->e_flags) {
+ ret = ocfs2_error(inode->i_sb,
+ "Inode %llu has corrupted extent %d with flags 0x%x at cpos %u\n",
+ (unsigned long long)ino, index, rec->e_flags, cpos);
+ goto out;
+ }
+
/*
* after moving/defraging to new location, the extent is not going
* to be refcounted anymore.
@@ -364,7 +370,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
int *vict_bit,
struct buffer_head **ret_bh)
{
- int ret, i, bits_per_unit = 0;
+ int ret, i, len, bits_per_unit = 0;
u64 blkno;
char namebuf[40];
@@ -375,9 +381,9 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode,
struct ocfs2_dinode *ac_dinode;
struct ocfs2_group_desc *bg;
- ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
- ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
- strlen(namebuf), &blkno);
+ len = ocfs2_sprintf_system_inode_name(namebuf, sizeof(namebuf), type, slot);
+ ret = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf, len, &blkno);
+
if (ret) {
ret = -ENOENT;
goto out;
@@ -492,7 +498,7 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
bg = (struct ocfs2_group_desc *)gd_bh->b_data;
/*
- * moving goal is not allowd to start with a group desc blok(#0 blk)
+ * moving goal is not allowed to start with a group desc blok(#0 blk)
* let's compromise to the latter cluster.
*/
if (range->me_goal == le64_to_cpu(bg->bg_blkno))
@@ -617,6 +623,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
*/
credits += OCFS2_INODE_UPDATE_CREDITS + 1;
+ inode_lock(tl_inode);
+
/*
* ocfs2_move_extent() didn't reserve any clusters in lock_allocators()
* logic, while we still need to lock the global_bitmap.
@@ -626,7 +634,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
if (!gb_inode) {
mlog(ML_ERROR, "unable to get global_bitmap inode\n");
ret = -EIO;
- goto out;
+ goto out_unlock_tl_inode;
}
inode_lock(gb_inode);
@@ -634,16 +642,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
if (ret) {
mlog_errno(ret);
- goto out_unlock_gb_mutex;
+ goto out_unlock_gb_inode;
}
- inode_lock(tl_inode);
-
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
- goto out_unlock_tl_inode;
+ goto out_unlock;
}
new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
@@ -658,7 +664,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
/*
* probe the victim cluster group to find a proper
- * region to fit wanted movement, it even will perfrom
+ * region to fit wanted movement, it even will perform
* a best-effort attempt by compromising to a threshold
* around the goal.
*/
@@ -685,7 +691,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
}
ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh,
- goal_bit, len);
+ goal_bit, len, 0, 0);
if (ret) {
ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len,
le16_to_cpu(gd->bg_chain));
@@ -703,15 +709,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
out_commit:
ocfs2_commit_trans(osb, handle);
brelse(gd_bh);
-
-out_unlock_tl_inode:
- inode_unlock(tl_inode);
-
+out_unlock:
ocfs2_inode_unlock(gb_inode, 1);
-out_unlock_gb_mutex:
+out_unlock_gb_inode:
inode_unlock(gb_inode);
brelse(gb_bh);
iput(gb_inode);
+out_unlock_tl_inode:
+ inode_unlock(tl_inode);
out:
if (context->meta_ac) {
@@ -868,6 +873,11 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
mlog_errno(ret);
goto out;
}
+ /*
+ * Invalidate extent cache after moving/defragging to prevent
+ * stale cached data with outdated extent flags.
+ */
+ ocfs2_extent_map_trunc(inode, cpos);
context->clusters_moved += alloc_size;
next:
@@ -920,7 +930,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
}
/*
- * rememer ip_xattr_sem also needs to be held if necessary
+ * remember ip_xattr_sem also needs to be held if necessary
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
@@ -951,8 +961,8 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
di = (struct ocfs2_dinode *)di_bh->b_data;
inode_set_ctime_current(inode);
- di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
- di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
+ di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
+ di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, di_bh);
@@ -1022,7 +1032,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
context->range = &range;
/*
- * ok, the default theshold for the defragmentation
+ * ok, the default threshold for the defragmentation
* is 1M, since our maximum clustersize was 1M also.
* any thought?
*/
@@ -1032,6 +1042,12 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
if (range.me_threshold > i_size_read(inode))
range.me_threshold = i_size_read(inode);
+ if (range.me_flags & ~(OCFS2_MOVE_EXT_FL_AUTO_DEFRAG |
+ OCFS2_MOVE_EXT_FL_PART_DEFRAG)) {
+ status = -EINVAL;
+ goto out_free;
+ }
+
if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) {
context->auto_defrag = 1;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 5cd6d7771cea..c90b254da75e 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -142,6 +142,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
bail_add:
ret = d_splice_alias(inode, dentry);
+ if (IS_ERR(ret))
+ goto bail_unlock;
if (inode) {
/*
@@ -154,15 +156,16 @@ bail_add:
* NOTE: This dentry already has ->d_op set from
* ocfs2_get_parent() and ocfs2_get_dentry()
*/
- if (!IS_ERR_OR_NULL(ret))
+ if (ret)
dentry = ret;
status = ocfs2_dentry_attach_lock(dentry, inode,
OCFS2_I(dir)->ip_blkno);
if (status) {
mlog_errno(status);
+ if (ret)
+ dput(ret);
ret = ERR_PTR(status);
- goto bail_unlock;
}
} else
ocfs2_dentry_attach_gen(dentry);
@@ -200,8 +203,10 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
mode = mode_strip_sgid(&nop_mnt_idmap, dir, mode);
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
status = dquot_initialize(inode);
- if (status)
+ if (status) {
+ iput(inode);
return ERR_PTR(status);
+ }
return inode;
}
@@ -506,7 +511,6 @@ static int __ocfs2_mknod_locked(struct inode *dir,
struct inode *inode,
dev_t dev,
struct buffer_head **new_fe_bh,
- struct buffer_head *parent_fe_bh,
handle_t *handle,
struct ocfs2_alloc_context *inode_ac,
u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit)
@@ -566,7 +570,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
fe->i_last_eb_blk = 0;
strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL);
- ktime_get_real_ts64(&ts);
+ ktime_get_coarse_real_ts64(&ts);
fe->i_atime = fe->i_ctime = fe->i_mtime =
cpu_to_le64(ts.tv_sec);
fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
@@ -639,14 +643,14 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
}
return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
- parent_fe_bh, handle, inode_ac,
- fe_blkno, suballoc_loc, suballoc_bit);
+ handle, inode_ac, fe_blkno,
+ suballoc_loc, suballoc_bit);
}
-static int ocfs2_mkdir(struct mnt_idmap *idmap,
- struct inode *dir,
- struct dentry *dentry,
- umode_t mode)
+static struct dentry *ocfs2_mkdir(struct mnt_idmap *idmap,
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode)
{
int ret;
@@ -656,7 +660,7 @@ static int ocfs2_mkdir(struct mnt_idmap *idmap,
if (ret)
mlog_errno(ret);
- return ret;
+ return ERR_PTR(ret);
}
static int ocfs2_create(struct mnt_idmap *idmap,
@@ -795,8 +799,9 @@ static int ocfs2_link(struct dentry *old_dentry,
inc_nlink(inode);
inode_set_ctime_current(inode);
ocfs2_set_links_count(fe, inode->i_nlink);
- fe->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
- fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
+ fe->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
+ fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, fe_bh);
err = ocfs2_add_entry(handle, dentry, inode,
@@ -993,9 +998,10 @@ static int ocfs2_unlink(struct inode *dir,
drop_nlink(inode);
drop_nlink(inode);
ocfs2_set_links_count(fe, inode->i_nlink);
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, fe_bh);
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
if (S_ISDIR(inode->i_mode))
drop_nlink(dir);
@@ -1336,7 +1342,7 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
goto bail;
}
- if (S_ISDIR(old_inode->i_mode)) {
+ if (S_ISDIR(old_inode->i_mode) && new_dir != old_dir) {
u64 old_inode_parent;
update_dot_dot = 1;
@@ -1353,8 +1359,7 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
goto bail;
}
- if (!new_inode && new_dir != old_dir &&
- new_dir->i_nlink >= ocfs2_link_max(osb)) {
+ if (!new_inode && new_dir->i_nlink >= ocfs2_link_max(osb)) {
status = -EMLINK;
goto bail;
}
@@ -1450,8 +1455,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
trace_ocfs2_rename_over_existing(
- (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
- (unsigned long long)newfe_bh->b_blocknr : 0ULL);
+ (unsigned long long)newfe_blkno, newfe_bh,
+ (unsigned long long)newfe_bh->b_blocknr);
if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
@@ -1550,8 +1555,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
if (status >= 0) {
old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
- old_di->i_ctime = cpu_to_le64(inode_get_ctime(old_inode).tv_sec);
- old_di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(old_inode).tv_nsec);
+ old_di->i_ctime = cpu_to_le64(inode_get_ctime_sec(old_inode));
+ old_di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(old_inode));
ocfs2_journal_dirty(handle, old_inode_bh);
} else
mlog_errno(status);
@@ -1592,11 +1597,18 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
drop_nlink(new_inode);
inode_set_ctime_current(new_inode);
}
- old_dir->i_mtime = inode_set_ctime_current(old_dir);
+ inode_set_mtime_to_ts(old_dir, inode_set_ctime_current(old_dir));
if (update_dot_dot) {
status = ocfs2_update_entry(old_inode, handle,
&old_inode_dot_dot_res, new_dir);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
+ if (S_ISDIR(old_inode->i_mode)) {
drop_nlink(old_dir);
if (new_inode) {
drop_nlink(new_inode);
@@ -1614,8 +1626,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
if (old_dir != new_dir) {
/* Keep the same times on both directories.*/
- new_dir->i_mtime = inode_set_ctime_to_ts(new_dir,
- inode_get_ctime(old_dir));
+ inode_set_mtime_to_ts(new_dir,
+ inode_set_ctime_to_ts(new_dir, inode_get_ctime(old_dir)));
/*
* This will also pick up the i_nlink change from the
@@ -1636,6 +1648,10 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
INODE_CACHE(old_dir),
old_dir_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
ocfs2_set_links_count(fe, old_dir->i_nlink);
ocfs2_journal_dirty(handle, old_dir_bh);
@@ -2177,8 +2193,10 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
* @osb: ocfs2 file system
* @ret_orphan_dir: Orphan dir inode - returned locked!
* @blkno: Actual block number of the inode to be inserted into orphan dir.
+ * @name: Buffer to store the name of the orphan.
* @lookup: dir lookup result, to be passed back into functions like
* ocfs2_orphan_add
+ * @dio: Flag indicating if direct IO is being used or not.
*
* Returns zero on success and the ret_orphan_dir, name and lookup
* fields will be populated.
@@ -2560,7 +2578,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
clear_nlink(inode);
/* do the real work now. */
status = __ocfs2_mknod_locked(dir, inode,
- 0, &new_di_bh, parent_di_bh, handle,
+ 0, &new_di_bh, handle,
inode_ac, di_blkno, suballoc_loc,
suballoc_bit);
if (status < 0) {
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index a503c553bab2..6aaa94c554c1 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -154,7 +154,7 @@ struct ocfs2_lock_stats {
struct ocfs2_lock_res {
void *l_priv;
- struct ocfs2_lock_res_ops *l_ops;
+ const struct ocfs2_lock_res_ops *l_ops;
struct list_head l_blocked_list;
@@ -284,6 +284,45 @@ enum ocfs2_mount_options
#define OCFS2_OSB_ERROR_FS 0x0004
#define OCFS2_DEFAULT_ATIME_QUANTUM 60
+struct ocfs2_triggers {
+ struct jbd2_buffer_trigger_type ot_triggers;
+ int ot_offset;
+ struct super_block *sb;
+};
+
+enum ocfs2_journal_trigger_type {
+ OCFS2_JTR_DI,
+ OCFS2_JTR_EB,
+ OCFS2_JTR_RB,
+ OCFS2_JTR_GD,
+ OCFS2_JTR_DB,
+ OCFS2_JTR_XB,
+ OCFS2_JTR_DQ,
+ OCFS2_JTR_DR,
+ OCFS2_JTR_DL,
+ OCFS2_JTR_NONE /* This must be the last entry */
+};
+
+#define OCFS2_JOURNAL_TRIGGER_COUNT OCFS2_JTR_NONE
+
+void ocfs2_initialize_journal_triggers(struct super_block *sb,
+ struct ocfs2_triggers triggers[]);
+
+enum ocfs2_recovery_state {
+ OCFS2_REC_ENABLED = 0,
+ OCFS2_REC_QUOTA_WANT_DISABLE,
+ /*
+ * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for
+ * ocfs2_recovery_disable_quota() to work.
+ */
+ OCFS2_REC_QUOTA_DISABLED,
+ OCFS2_REC_WANT_DISABLE,
+ /*
+ * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work
+ */
+ OCFS2_REC_DISABLED,
+};
+
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
@@ -346,11 +385,14 @@ struct ocfs2_super
struct ocfs2_recovery_map *recovery_map;
struct ocfs2_replay_map *replay_map;
struct task_struct *recovery_thread_task;
- int disable_recovery;
+ enum ocfs2_recovery_state recovery_state;
wait_queue_head_t checkpoint_event;
struct ocfs2_journal *journal;
unsigned long osb_commit_interval;
+ /* Journal triggers for checksum */
+ struct ocfs2_triggers s_journal_triggers[OCFS2_JOURNAL_TRIGGER_COUNT];
+
struct delayed_work la_enable_wq;
/*
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 7aebdbf5cc0a..f7763da5c4a2 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -132,7 +132,7 @@
* well as the name of the cluster being joined.
* mount.ocfs2 must pass in a matching stack name.
*
- * If not set, the classic stack will be used. This is compatbile with
+ * If not set, the classic stack will be used. This is compatible with
* all older versions.
*/
#define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080
@@ -143,7 +143,7 @@
/* Support for extended attributes */
#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
-/* Support for indexed directores */
+/* Support for indexed directories */
#define OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS 0x0400
/* Metadata checksum and error correction */
@@ -156,7 +156,7 @@
#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000
/*
- * Incompat bit to indicate useable clusterinfo with stackflags for all
+ * Incompat bit to indicate usable clusterinfo with stackflags for all
* cluster stacks (userspace adnd o2cb). If this bit is set,
* INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set.
*/
@@ -468,7 +468,8 @@ struct ocfs2_extent_list {
__le16 l_reserved1;
__le64 l_reserved2; /* Pad to
sizeof(ocfs2_extent_rec) */
-/*10*/ struct ocfs2_extent_rec l_recs[]; /* Extent records */
+ /* Extent records */
+/*10*/ struct ocfs2_extent_rec l_recs[] __counted_by_le(l_count);
};
/*
@@ -482,7 +483,8 @@ struct ocfs2_chain_list {
__le16 cl_count; /* Total chains in this list */
__le16 cl_next_free_rec; /* Next unused chain slot */
__le64 cl_reserved1;
-/*10*/ struct ocfs2_chain_rec cl_recs[]; /* Chain records */
+ /* Chain records */
+/*10*/ struct ocfs2_chain_rec cl_recs[] __counted_by_le(cl_count);
};
/*
@@ -494,7 +496,8 @@ struct ocfs2_truncate_log {
/*00*/ __le16 tl_count; /* Total records in this log */
__le16 tl_used; /* Number of records in use */
__le32 tl_reserved1;
-/*08*/ struct ocfs2_truncate_rec tl_recs[]; /* Truncate records */
+ /* Truncate records */
+/*08*/ struct ocfs2_truncate_rec tl_recs[] __counted_by_le(tl_count);
};
/*
@@ -614,7 +617,7 @@ struct ocfs2_super_block {
__le16 s_reserved0;
__le32 s_dx_seed[3]; /* seed[0-2] for dx dir hash.
* s_uuid_hash serves as seed[3]. */
-/*C0*/ __le64 s_reserved2[15]; /* Fill out superblock */
+/*C8*/ __le64 s_reserved2[15]; /* Fill out superblock */
/*140*/
/*
@@ -796,9 +799,10 @@ struct ocfs2_dx_entry_list {
* possible in de_entries */
__le16 de_num_used; /* Current number of
* de_entries entries */
- struct ocfs2_dx_entry de_entries[]; /* Indexed dir entries
- * in a packed array of
- * length de_num_used */
+ /* Indexed dir entries in a packed
+ * array of length de_num_used.
+ */
+ struct ocfs2_dx_entry de_entries[] __counted_by_le(de_count);
};
#define OCFS2_DX_FLAG_INLINE 0x01
@@ -883,7 +887,8 @@ struct ocfs2_group_desc
__le16 bg_free_bits_count; /* Free bits count */
__le16 bg_chain; /* What chain I am in. */
/*10*/ __le32 bg_generation;
- __le32 bg_reserved1;
+ __le16 bg_contig_free_bits; /* max contig free bits length */
+ __le16 bg_reserved1;
__le64 bg_next_group; /* Next group in my list, in
blocks */
/*20*/ __le64 bg_parent_dinode; /* dinode which owns me, in
@@ -933,7 +938,8 @@ struct ocfs2_refcount_list {
__le16 rl_used; /* Current number of used records */
__le32 rl_reserved2;
__le64 rl_reserved1; /* Pad to sizeof(ocfs2_refcount_record) */
-/*10*/ struct ocfs2_refcount_rec rl_recs[]; /* Refcount records */
+ /* Refcount records */
+/*10*/ struct ocfs2_refcount_rec rl_recs[] __counted_by_le(rl_count);
};
@@ -1019,7 +1025,8 @@ struct ocfs2_xattr_header {
buckets. A block uses
xb_check and sets
this field to zero.) */
- struct ocfs2_xattr_entry xh_entries[]; /* xattr entry list. */
+ /* xattr entry list. */
+ struct ocfs2_xattr_entry xh_entries[] __counted_by_le(xh_count);
};
/*
@@ -1082,7 +1089,7 @@ struct ocfs2_xattr_block {
struct ocfs2_xattr_header xb_header; /* xattr header if this
block contains xattr */
struct ocfs2_xattr_tree_root xb_root;/* xattr tree root if this
- block cotains xattr
+ block contains xattr
tree. */
} xb_attrs;
};
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h
index 9680797bc531..2de2f8733283 100644
--- a/fs/ocfs2/ocfs2_ioctl.h
+++ b/fs/ocfs2/ocfs2_ioctl.h
@@ -215,7 +215,7 @@ struct ocfs2_move_extents {
movement less likely
to fail, may make fs
even more fragmented */
-#define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmenation
+#define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmentation
completely gets done.
*/
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 8ac357ce6a30..9b234c03d693 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -93,7 +93,7 @@ static char *ocfs2_lock_type_strings[] = {
[OCFS2_LOCK_TYPE_DATA] = "Data",
[OCFS2_LOCK_TYPE_SUPER] = "Super",
[OCFS2_LOCK_TYPE_RENAME] = "Rename",
- /* Need to differntiate from [R]ename.. serializing writes is the
+ /* Need to differentiate from [R]ename.. serializing writes is the
* important job it does, anyway. */
[OCFS2_LOCK_TYPE_RW] = "Write/Read",
[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index ac4fd1d5b128..4b32fb5658ad 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -82,7 +82,7 @@ DECLARE_EVENT_CLASS(ocfs2__string,
__string(name,name)
),
TP_fast_assign(
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%s", __get_str(name))
);
@@ -1157,8 +1157,6 @@ DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_get_block_end);
DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_readpage);
-DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_writepage);
-
DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_bmap);
TRACE_EVENT(ocfs2_try_to_write_inline_data,
@@ -1291,7 +1289,7 @@ DECLARE_EVENT_CLASS(ocfs2__file_ops,
__entry->dentry = dentry;
__entry->ino = ino;
__entry->d_len = d_len;
- __assign_str(d_name, d_name);
+ __assign_str(d_name);
__entry->para = para;
),
TP_printk("%p %p %p %llu %llu %.*s", __entry->inode, __entry->file,
@@ -1427,7 +1425,7 @@ TRACE_EVENT(ocfs2_setattr,
__entry->dentry = dentry;
__entry->ino = ino;
__entry->d_len = d_len;
- __assign_str(d_name, d_name);
+ __assign_str(d_name);
__entry->ia_valid = ia_valid;
__entry->ia_mode = ia_mode;
__entry->ia_uid = ia_uid;
@@ -1571,8 +1569,6 @@ DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_delete_inode);
DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_clear_inode);
-DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_drop_inode);
-
TRACE_EVENT(ocfs2_inode_revalidate,
TP_PROTO(void *inode, unsigned long long ino,
unsigned int flags),
@@ -1660,34 +1656,34 @@ TRACE_EVENT(ocfs2_remount,
);
TRACE_EVENT(ocfs2_fill_super,
- TP_PROTO(void *sb, void *data, int silent),
- TP_ARGS(sb, data, silent),
+ TP_PROTO(void *sb, void *fc, int silent),
+ TP_ARGS(sb, fc, silent),
TP_STRUCT__entry(
__field(void *, sb)
- __field(void *, data)
+ __field(void *, fc)
__field(int, silent)
),
TP_fast_assign(
__entry->sb = sb;
- __entry->data = data;
+ __entry->fc = fc;
__entry->silent = silent;
),
TP_printk("%p %p %d", __entry->sb,
- __entry->data, __entry->silent)
+ __entry->fc, __entry->silent)
);
TRACE_EVENT(ocfs2_parse_options,
- TP_PROTO(int is_remount, char *options),
- TP_ARGS(is_remount, options),
+ TP_PROTO(int is_remount, const char *option),
+ TP_ARGS(is_remount, option),
TP_STRUCT__entry(
__field(int, is_remount)
- __string(options, options)
+ __string(option, option)
),
TP_fast_assign(
__entry->is_remount = is_remount;
- __assign_str(options, options);
+ __assign_str(option);
),
- TP_printk("%d %s", __entry->is_remount, __get_str(options))
+ TP_printk("%d %s", __entry->is_remount, __get_str(option))
);
DEFINE_OCFS2_POINTER_EVENT(ocfs2_put_super);
@@ -1720,8 +1716,8 @@ TRACE_EVENT(ocfs2_initialize_super,
__field(int, cluster_bits)
),
TP_fast_assign(
- __assign_str(label, label);
- __assign_str(uuid_str, uuid_str);
+ __assign_str(label);
+ __assign_str(uuid_str);
__entry->root_dir = root_dir;
__entry->system_dir = system_dir;
__entry->cluster_bits = cluster_bits;
@@ -1748,7 +1744,7 @@ TRACE_EVENT(ocfs2_init_xattr_set_ctxt,
__field(int, credits)
),
TP_fast_assign(
- __assign_str(name, name);
+ __assign_str(name);
__entry->meta = meta;
__entry->clusters = clusters;
__entry->credits = credits;
@@ -1772,7 +1768,7 @@ DECLARE_EVENT_CLASS(ocfs2__xattr_find,
),
TP_fast_assign(
__entry->ino = ino;
- __assign_str(name, name);
+ __assign_str(name);
__entry->name_index = name_index;
__entry->hash = hash;
__entry->location = location;
@@ -2021,7 +2017,7 @@ TRACE_EVENT(ocfs2_sync_dquot_helper,
__entry->dq_id = dq_id;
__entry->dq_type = dq_type;
__entry->type = type;
- __assign_str(s_id, s_id);
+ __assign_str(s_id);
),
TP_printk("%u %u %lu %s", __entry->dq_id, __entry->dq_type,
__entry->type, __get_str(s_id))
@@ -2062,7 +2058,7 @@ TRACE_EVENT(ocfs2_dx_dir_search,
TP_fast_assign(
__entry->ino = ino;
__entry->namelen = namelen;
- __assign_str(name, name);
+ __assign_str(name);
__entry->major_hash = major_hash;
__entry->minor_hash = minor_hash;
__entry->blkno = blkno;
@@ -2090,7 +2086,7 @@ TRACE_EVENT(ocfs2_find_files_on_disk,
),
TP_fast_assign(
__entry->namelen = namelen;
- __assign_str(name, name);
+ __assign_str(name);
__entry->blkno = blkno;
__entry->dir = dir;
),
@@ -2109,7 +2105,7 @@ TRACE_EVENT(ocfs2_check_dir_for_entry,
TP_fast_assign(
__entry->dir = dir;
__entry->namelen = namelen;
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%llu %.*s", __entry->dir,
__entry->namelen, __get_str(name))
@@ -2137,7 +2133,7 @@ TRACE_EVENT(ocfs2_dx_dir_index_root_block,
__entry->major_hash = major_hash;
__entry->minor_hash = minor_hash;
__entry->namelen = namelen;
- __assign_str(name, name);
+ __assign_str(name);
__entry->num_used = num_used;
),
TP_printk("%llu %x %x %.*s %u", __entry->dir,
@@ -2173,7 +2169,7 @@ DECLARE_EVENT_CLASS(ocfs2__dentry_ops,
__entry->dir = dir;
__entry->dentry = dentry;
__entry->name_len = name_len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->dir_blkno = dir_blkno;
__entry->extra = extra;
),
@@ -2219,7 +2215,7 @@ TRACE_EVENT(ocfs2_mknod,
__entry->dir = dir;
__entry->dentry = dentry;
__entry->name_len = name_len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->dir_blkno = dir_blkno;
__entry->dev = dev;
__entry->mode = mode;
@@ -2243,9 +2239,9 @@ TRACE_EVENT(ocfs2_link,
TP_fast_assign(
__entry->ino = ino;
__entry->old_len = old_len;
- __assign_str(old_name, old_name);
+ __assign_str(old_name);
__entry->name_len = name_len;
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%llu %.*s %.*s", __entry->ino,
__entry->old_len, __get_str(old_name),
@@ -2281,9 +2277,9 @@ TRACE_EVENT(ocfs2_rename,
__entry->new_dir = new_dir;
__entry->new_dentry = new_dentry;
__entry->old_len = old_len;
- __assign_str(old_name, old_name);
+ __assign_str(old_name);
__entry->new_len = new_len;
- __assign_str(new_name, new_name);
+ __assign_str(new_name);
),
TP_printk("%p %p %p %p %.*s %.*s",
__entry->old_dir, __entry->old_dentry,
@@ -2303,7 +2299,7 @@ TRACE_EVENT(ocfs2_rename_target_exists,
),
TP_fast_assign(
__entry->new_len = new_len;
- __assign_str(new_name, new_name);
+ __assign_str(new_name);
),
TP_printk("%.*s", __entry->new_len, __get_str(new_name))
);
@@ -2346,7 +2342,7 @@ TRACE_EVENT(ocfs2_symlink_begin,
__entry->dentry = dentry;
__entry->symname = symname;
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%p %p %s %.*s", __entry->dir, __entry->dentry,
__entry->symname, __entry->len, __get_str(name))
@@ -2362,7 +2358,7 @@ TRACE_EVENT(ocfs2_blkno_stringify,
),
TP_fast_assign(
__entry->blkno = blkno;
- __assign_str(name, name);
+ __assign_str(name);
__entry->namelen = namelen;
),
TP_printk("%llu %s %d", __entry->blkno, __get_str(name),
@@ -2383,7 +2379,7 @@ TRACE_EVENT(ocfs2_orphan_del,
),
TP_fast_assign(
__entry->dir = dir;
- __assign_str(name, name);
+ __assign_str(name);
__entry->namelen = namelen;
),
TP_printk("%llu %s %d", __entry->dir, __get_str(name),
@@ -2405,7 +2401,7 @@ TRACE_EVENT(ocfs2_dentry_revalidate,
TP_fast_assign(
__entry->dentry = dentry;
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%p %.*s", __entry->dentry, __entry->len, __get_str(name))
);
@@ -2422,7 +2418,7 @@ TRACE_EVENT(ocfs2_dentry_revalidate_negative,
),
TP_fast_assign(
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->pgen = pgen;
__entry->gen = gen;
),
@@ -2447,7 +2443,7 @@ TRACE_EVENT(ocfs2_find_local_alias,
),
TP_fast_assign(
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%.*s", __entry->len, __get_str(name))
);
@@ -2464,7 +2460,7 @@ TRACE_EVENT(ocfs2_dentry_attach_lock,
),
TP_fast_assign(
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->parent = parent;
__entry->fsdata = fsdata;
),
@@ -2482,7 +2478,7 @@ TRACE_EVENT(ocfs2_dentry_attach_lock_found,
__field(unsigned long long, ino)
),
TP_fast_assign(
- __assign_str(name, name);
+ __assign_str(name);
__entry->parent = parent;
__entry->ino = ino;
),
@@ -2529,7 +2525,7 @@ TRACE_EVENT(ocfs2_get_parent,
TP_fast_assign(
__entry->child = child;
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->ino = ino;
),
TP_printk("%p %.*s %llu", __entry->child, __entry->len,
@@ -2553,7 +2549,7 @@ TRACE_EVENT(ocfs2_encode_fh_begin,
TP_fast_assign(
__entry->dentry = dentry;
__entry->name_len = name_len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->fh = fh;
__entry->len = len;
__entry->connectable = connectable;
@@ -2579,6 +2575,8 @@ DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_commit_cache_end);
DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans);
+DEFINE_OCFS2_INT_EVENT(ocfs2_assure_trans_credits);
+
DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart);
DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans);
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index ebb5c99f490e..788a8de922a4 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -97,7 +97,6 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off);
int ocfs2_global_read_info(struct super_block *sb, int type);
int ocfs2_global_write_info(struct super_block *sb, int type);
-int ocfs2_global_read_dquot(struct dquot *dquot);
int __ocfs2_sync_dquot(struct dquot *dquot, int freeing);
static inline int ocfs2_sync_dquot(struct dquot *dquot)
{
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index dc9f76ab7e13..e85b1ccf81be 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -273,7 +273,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
if (new)
memset(bh->b_data, 0, sb->s_blocksize);
memcpy(bh->b_data + offset, data, len);
- flush_dcache_page(bh->b_page);
+ flush_dcache_folio(bh->b_folio);
set_buffer_uptodate(bh);
unlock_buffer(bh);
ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh);
@@ -371,12 +371,16 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk,
&pcount, NULL);
- if (status < 0)
+ if (status < 0) {
+ mlog_errno(status);
goto out_unlock;
+ }
status = ocfs2_qinfo_lock(oinfo, 0);
- if (status < 0)
+ if (status < 0) {
+ mlog_errno(status);
goto out_unlock;
+ }
status = sb->s_op->quota_read(sb, type, (char *)&dinfo,
sizeof(struct ocfs2_global_disk_dqinfo),
OCFS2_GLOBAL_INFO_OFF);
@@ -404,12 +408,11 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
schedule_delayed_work(&oinfo->dqi_sync_work,
msecs_to_jiffies(oinfo->dqi_syncms));
-out_err:
- return status;
+ return 0;
out_unlock:
ocfs2_unlock_global_qf(oinfo, 0);
- mlog_errno(status);
- goto out_err;
+out_err:
+ return status;
}
/* Write information to global quota file. Expects exclusive lock on quota
@@ -447,14 +450,17 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
int err;
struct quota_info *dqopt = sb_dqopt(sb);
struct ocfs2_mem_dqinfo *info = dqopt->info[type].dqi_priv;
+ unsigned int memalloc;
down_write(&dqopt->dqio_sem);
+ memalloc = memalloc_nofs_save();
err = ocfs2_qinfo_lock(info, 1);
if (err < 0)
goto out_sem;
err = __ocfs2_global_write_info(sb, type);
ocfs2_qinfo_unlock(info, 1);
out_sem:
+ memalloc_nofs_restore(memalloc);
up_write(&dqopt->dqio_sem);
return err;
}
@@ -601,6 +607,7 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
struct ocfs2_super *osb = OCFS2_SB(sb);
int status = 0;
+ unsigned int memalloc;
trace_ocfs2_sync_dquot_helper(from_kqid(&init_user_ns, dquot->dq_id),
dquot->dq_id.type,
@@ -618,6 +625,7 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
goto out_ilock;
}
down_write(&sb_dqopt(sb)->dqio_sem);
+ memalloc = memalloc_nofs_save();
status = ocfs2_sync_dquot(dquot);
if (status < 0)
mlog_errno(status);
@@ -625,6 +633,7 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type)
status = ocfs2_local_write_dquot(dquot);
if (status < 0)
mlog_errno(status);
+ memalloc_nofs_restore(memalloc);
up_write(&sb_dqopt(sb)->dqio_sem);
ocfs2_commit_trans(osb, handle);
out_ilock:
@@ -662,6 +671,7 @@ static int ocfs2_write_dquot(struct dquot *dquot)
handle_t *handle;
struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
int status = 0;
+ unsigned int memalloc;
trace_ocfs2_write_dquot(from_kqid(&init_user_ns, dquot->dq_id),
dquot->dq_id.type);
@@ -673,7 +683,9 @@ static int ocfs2_write_dquot(struct dquot *dquot)
goto out;
}
down_write(&sb_dqopt(dquot->dq_sb)->dqio_sem);
+ memalloc = memalloc_nofs_save();
status = ocfs2_local_write_dquot(dquot);
+ memalloc_nofs_restore(memalloc);
up_write(&sb_dqopt(dquot->dq_sb)->dqio_sem);
ocfs2_commit_trans(osb, handle);
out:
@@ -749,6 +761,11 @@ static int ocfs2_release_dquot(struct dquot *dquot)
handle = ocfs2_start_trans(osb,
ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_id.type));
if (IS_ERR(handle)) {
+ /*
+ * Mark dquot as inactive to avoid endless cycle in
+ * quota_release_workfn().
+ */
+ clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
status = PTR_ERR(handle);
mlog_errno(status);
goto out_ilock;
@@ -881,7 +898,7 @@ static int ocfs2_get_next_id(struct super_block *sb, struct kqid *qid)
int status = 0;
trace_ocfs2_get_next_id(from_kqid(&init_user_ns, *qid), type);
- if (!sb_has_quota_loaded(sb, type)) {
+ if (!sb_has_quota_active(sb, type)) {
status = -ESRCH;
goto out;
}
@@ -920,6 +937,7 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
handle_t *handle;
struct ocfs2_super *osb = OCFS2_SB(sb);
+ unsigned int memalloc;
trace_ocfs2_mark_dquot_dirty(from_kqid(&init_user_ns, dquot->dq_id),
type);
@@ -946,6 +964,7 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
goto out_ilock;
}
down_write(&sb_dqopt(sb)->dqio_sem);
+ memalloc = memalloc_nofs_save();
status = ocfs2_sync_dquot(dquot);
if (status < 0) {
mlog_errno(status);
@@ -954,6 +973,7 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
/* Now write updated local dquot structure */
status = ocfs2_local_write_dquot(dquot);
out_dlock:
+ memalloc_nofs_restore(memalloc);
up_write(&sb_dqopt(sb)->dqio_sem);
ocfs2_commit_trans(osb, handle);
out_ilock:
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index dfaae1e52412..de7f12858729 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -453,8 +453,7 @@ out:
/* Sync changes in local quota file into global quota file and
* reinitialize local quota file.
- * The function expects local quota file to be already locked and
- * s_umount locked in shared mode. */
+ * The function expects local quota file to be already locked. */
static int ocfs2_recover_local_quota_file(struct inode *lqinode,
int type,
struct ocfs2_quota_recovery *rec)
@@ -470,6 +469,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
int bit, chunk;
struct ocfs2_recovery_chunk *rchunk, *next;
qsize_t spacechange, inodechange;
+ unsigned int memalloc;
trace_ocfs2_recover_local_quota_file((unsigned long)lqinode->i_ino, type);
@@ -521,6 +521,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
goto out_drop_lock;
}
down_write(&sb_dqopt(sb)->dqio_sem);
+ memalloc = memalloc_nofs_save();
spin_lock(&dquot->dq_dqb_lock);
/* Add usage from quota entry into quota changes
* of our node. Auxiliary variables are important
@@ -553,6 +554,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
unlock_buffer(qbh);
ocfs2_journal_dirty(handle, qbh);
out_commit:
+ memalloc_nofs_restore(memalloc);
up_write(&sb_dqopt(sb)->dqio_sem);
ocfs2_commit_trans(OCFS2_SB(sb), handle);
out_drop_lock:
@@ -585,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
{
unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
LOCAL_GROUP_QUOTA_SYSTEM_INODE };
- struct super_block *sb = osb->sb;
struct ocfs2_local_disk_dqinfo *ldinfo;
struct buffer_head *bh;
handle_t *handle;
@@ -597,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for "
"slot %u\n", osb->dev_str, slot_num);
- down_read(&sb->s_umount);
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
if (list_empty(&(rec->r_list[type])))
continue;
@@ -674,8 +674,7 @@ out_put:
break;
}
out:
- up_read(&sb->s_umount);
- kfree(rec);
+ ocfs2_free_quota_recovery(rec);
return status;
}
@@ -689,7 +688,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
int status;
struct buffer_head *bh = NULL;
struct ocfs2_quota_recovery *rec;
- int locked = 0;
+ int locked = 0, global_read = 0;
info->dqi_max_spc_limit = 0x7fffffffffffffffLL;
info->dqi_max_ino_limit = 0x7fffffffffffffffLL;
@@ -697,6 +696,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
if (!oinfo) {
mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota"
" info.");
+ status = -ENOMEM;
goto out_err;
}
info->dqi_priv = oinfo;
@@ -709,6 +709,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
status = ocfs2_global_read_info(sb, type);
if (status < 0)
goto out_err;
+ global_read = 1;
status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1);
if (status < 0) {
@@ -779,10 +780,12 @@ out_err:
if (locked)
ocfs2_inode_unlock(lqinode, 1);
ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
+ if (global_read)
+ cancel_delayed_work_sync(&oinfo->dqi_sync_work);
kfree(oinfo);
}
brelse(bh);
- return -1;
+ return status;
}
/* Write local info to quota file */
@@ -836,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
/*
- * s_umount held in exclusive mode protects us against racing with
- * recovery thread...
+ * ocfs2_dismount_volume() has already aborted quota recovery...
*/
if (oinfo->dqi_rec) {
ocfs2_free_quota_recovery(oinfo->dqi_rec);
@@ -860,6 +862,7 @@ out:
brelse(oinfo->dqi_libh);
brelse(oinfo->dqi_lqi_bh);
kfree(oinfo);
+ info->dqi_priv = NULL;
return status;
}
@@ -1240,6 +1243,10 @@ int ocfs2_create_local_dquot(struct dquot *dquot)
&od->dq_local_phys_blk,
&pcount,
NULL);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
/* Initialize dquot structure on disk */
status = ocfs2_local_write_dquot(dquot);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 25c8ec3c8c3a..c92e0ea85bca 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -25,6 +25,7 @@
#include "namei.h"
#include "ocfs2_trace.h"
#include "file.h"
+#include "symlink.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -33,6 +34,7 @@
#include <linux/pagevec.h>
#include <linux/swap.h>
#include <linux/security.h>
+#include <linux/string.h>
#include <linux/fsnotify.h>
#include <linux/quotaops.h>
#include <linux/namei.h>
@@ -620,7 +622,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
/* Initialize ocfs2_refcount_block. */
rb = (struct ocfs2_refcount_block *)new_bh->b_data;
memset(rb, 0, inode->i_sb->s_blocksize);
- strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
+ strscpy(rb->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
@@ -630,7 +632,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
rb->rf_records.rl_count =
cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
spin_lock(&osb->osb_lock);
- rb->rf_generation = osb->s_next_generation++;
+ rb->rf_generation = cpu_to_le32(osb->s_next_generation++);
spin_unlock(&osb->osb_lock);
ocfs2_journal_dirty(handle, new_bh);
@@ -1392,13 +1394,6 @@ static int cmp_refcount_rec_by_cpos(const void *a, const void *b)
return 0;
}
-static void swap_refcount_rec(void *a, void *b, int size)
-{
- struct ocfs2_refcount_rec *l = a, *r = b;
-
- swap(*l, *r);
-}
-
/*
* The refcount cpos are ordered by their 64bit cpos,
* But we will use the low 32 bit to be the e_cpos in the b-tree.
@@ -1474,7 +1469,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
*/
sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
sizeof(struct ocfs2_refcount_rec),
- cmp_refcount_rec_by_low_cpos, swap_refcount_rec);
+ cmp_refcount_rec_by_low_cpos, NULL);
ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index);
if (ret) {
@@ -1499,11 +1494,11 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
sizeof(struct ocfs2_refcount_rec),
- cmp_refcount_rec_by_cpos, swap_refcount_rec);
+ cmp_refcount_rec_by_cpos, NULL);
sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used),
sizeof(struct ocfs2_refcount_rec),
- cmp_refcount_rec_by_cpos, swap_refcount_rec);
+ cmp_refcount_rec_by_cpos, NULL);
*split_cpos = cpos;
return 0;
@@ -1568,7 +1563,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
/* Initialize ocfs2_refcount_block. */
new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
memset(new_rb, 0, sb->s_blocksize);
- strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
+ strscpy(new_rb->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
@@ -2426,7 +2421,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
*
* If we will insert a new one, this is easy and only happens
* during adding refcounted flag to the extent, so we don't
- * have a chance of spliting. We just need one record.
+ * have a chance of splitting. We just need one record.
*
* If the refcount rec already exists, that would be a little
* complicated. we may have to:
@@ -2616,11 +2611,11 @@ static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
/*
* Calculate out the start and number of virtual clusters we need to CoW.
*
- * cpos is vitual start cluster position we want to do CoW in a
+ * cpos is virtual start cluster position we want to do CoW in a
* file and write_len is the cluster length.
* max_cpos is the place where we want to stop CoW intentionally.
*
- * Normal we will start CoW from the beginning of extent record cotaining cpos.
+ * Normal we will start CoW from the beginning of extent record containing cpos.
* We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we
* get good I/O from the resulting extent tree.
*/
@@ -2908,7 +2903,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
int ret = 0, partial;
struct super_block *sb = inode->i_sb;
u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
- struct page *page;
pgoff_t page_index;
unsigned int from, to;
loff_t offset, end, map_end;
@@ -2927,6 +2921,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
end = i_size_read(inode);
while (offset < end) {
+ struct folio *folio;
page_index = offset >> PAGE_SHIFT;
map_end = ((loff_t)page_index + 1) << PAGE_SHIFT;
if (map_end > end)
@@ -2939,9 +2934,10 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
to = map_end & (PAGE_SIZE - 1);
retry:
- page = find_or_create_page(mapping, page_index, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
+ folio = __filemap_get_folio(mapping, page_index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS);
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
mlog_errno(ret);
break;
}
@@ -2951,9 +2947,9 @@ retry:
* page, so write it back.
*/
if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) {
- if (PageDirty(page)) {
- unlock_page(page);
- put_page(page);
+ if (folio_test_dirty(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
ret = filemap_write_and_wait_range(mapping,
offset, map_end - 1);
@@ -2961,9 +2957,7 @@ retry:
}
}
- if (!PageUptodate(page)) {
- struct folio *folio = page_folio(page);
-
+ if (!folio_test_uptodate(folio)) {
ret = block_read_full_folio(folio, ocfs2_get_block);
if (ret) {
mlog_errno(ret);
@@ -2972,8 +2966,8 @@ retry:
folio_lock(folio);
}
- if (page_has_buffers(page)) {
- ret = walk_page_buffers(handle, page_buffers(page),
+ if (folio_buffers(folio)) {
+ ret = walk_page_buffers(handle, folio_buffers(folio),
from, to, &partial,
ocfs2_clear_cow_buffer);
if (ret) {
@@ -2982,14 +2976,12 @@ retry:
}
}
- ocfs2_map_and_dirty_page(inode,
- handle, from, to,
- page, 0, &new_block);
- mark_page_accessed(page);
+ ocfs2_map_and_dirty_folio(inode, handle, from, to,
+ folio, 0, &new_block);
+ folio_mark_accessed(folio);
unlock:
- unlock_page(page);
- put_page(page);
- page = NULL;
+ folio_unlock(folio);
+ folio_put(folio);
offset = map_end;
if (ret)
break;
@@ -3751,8 +3743,8 @@ static int ocfs2_change_ctime(struct inode *inode,
}
inode_set_ctime_current(inode);
- di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
- di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
+ di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
+ di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
ocfs2_journal_dirty(handle, di_bh);
@@ -4075,10 +4067,10 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
*/
inode_set_ctime_current(t_inode);
- di->i_ctime = cpu_to_le64(inode_get_ctime(t_inode).tv_sec);
- di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(t_inode).tv_nsec);
+ di->i_ctime = cpu_to_le64(inode_get_ctime_sec(t_inode));
+ di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(t_inode));
- t_inode->i_mtime = s_inode->i_mtime;
+ inode_set_mtime_to_ts(t_inode, inode_get_mtime(s_inode));
di->i_mtime = s_di->i_mtime;
di->i_mtime_nsec = s_di->i_mtime_nsec;
}
@@ -4155,8 +4147,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
int ret;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *new_bh = NULL;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
@@ -4182,6 +4175,26 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto out_unlock;
}
+ if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) &&
+ (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
+ /*
+ * Adjust extent record count to reserve space for extended attribute.
+ * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
+ */
+ struct ocfs2_inode_info *new_oi = OCFS2_I(new_inode);
+
+ if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
+ !(ocfs2_inode_is_fast_symlink(new_inode))) {
+ struct ocfs2_dinode *new_di = (struct ocfs2_dinode *)new_bh->b_data;
+ struct ocfs2_dinode *old_di = (struct ocfs2_dinode *)old_bh->b_data;
+ struct ocfs2_extent_list *el = &new_di->id2.i_list;
+ int inline_size = le16_to_cpu(old_di->i_xattr_inline_size);
+
+ le16_add_cpu(&el->l_count, -(inline_size /
+ sizeof(struct ocfs2_extent_rec)));
+ }
+ }
+
ret = ocfs2_create_reflink_node(inode, old_bh,
new_inode, new_bh, preserve);
if (ret) {
@@ -4189,7 +4202,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto inode_unlock;
}
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+ if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
ret = ocfs2_reflink_xattrs(inode, old_bh,
new_inode, new_bh,
preserve);
@@ -4406,7 +4419,7 @@ int ocfs2_reflink_ioctl(struct inode *inode,
return error;
}
- new_dentry = user_path_create(AT_FDCWD, newname, &new_path, 0);
+ new_dentry = start_creating_user_path(AT_FDCWD, newname, &new_path, 0);
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry)) {
mlog_errno(error);
@@ -4423,7 +4436,7 @@ int ocfs2_reflink_ioctl(struct inode *inode,
d_inode(new_path.dentry),
new_dentry, preserve);
out_dput:
- done_path_create(&new_path, new_dentry);
+ end_creating_path(&new_path, new_dentry);
out:
path_put(&old_path);
@@ -4456,7 +4469,7 @@ int ocfs2_reflink_update_dest(struct inode *dest,
if (newlen > i_size_read(dest))
i_size_write(dest, newlen);
spin_unlock(&OCFS2_I(dest)->ip_lock);
- dest->i_mtime = inode_set_ctime_current(dest);
+ inode_set_mtime_to_ts(dest, inode_set_ctime_current(dest));
ret = ocfs2_mark_inode_dirty(handle, dest, d_bh);
if (ret) {
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index a9d1296d736d..1fe61974d9f0 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -414,7 +414,7 @@ static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
start = search_start;
while ((offset = ocfs2_find_next_zero_bit(bitmap, resmap->m_bitmap_len,
- start)) != -1) {
+ start)) < resmap->m_bitmap_len) {
/* Search reached end of the region */
if (offset >= (search_start + search_len))
break;
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
index ec8101ef5717..4fce17180342 100644
--- a/fs/ocfs2/reservations.h
+++ b/fs/ocfs2/reservations.h
@@ -31,7 +31,7 @@ struct ocfs2_alloc_reservation {
#define OCFS2_RESV_FLAG_INUSE 0x01 /* Set when r_node is part of a btree */
#define OCFS2_RESV_FLAG_TMP 0x02 /* Temporary reservation, will be
- * destroyed immedately after use */
+ * destroyed immediately after use */
#define OCFS2_RESV_FLAG_DIR 0x04 /* Reservation is for an unindexed
* directory btree */
@@ -125,7 +125,7 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
/**
* ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used.
* @resmap: reservations bitmap
- * @resv: optional reservation to recalulate based on new bitmap
+ * @resv: optional reservation to recalculate based on new bitmap
* @cstart: start of allocation in clusters
* @clen: end of allocation in clusters.
*
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index d65d43c61857..b0733c08ed13 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -91,6 +91,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
u16 cl_bpc = le16_to_cpu(cl->cl_bpc);
u16 cl_cpg = le16_to_cpu(cl->cl_cpg);
u16 old_bg_clusters;
+ u16 contig_bits;
+ __le16 old_bg_contig_free_bits;
trace_ocfs2_update_last_group_and_inode(new_clusters,
first_new_cluster);
@@ -122,6 +124,11 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
}
+ contig_bits = ocfs2_find_max_contig_free_bits(group->bg_bitmap,
+ le16_to_cpu(group->bg_bits), 0);
+ old_bg_contig_free_bits = group->bg_contig_free_bits;
+ group->bg_contig_free_bits = cpu_to_le16(contig_bits);
+
ocfs2_journal_dirty(handle, group_bh);
/* update the inode accordingly. */
@@ -160,6 +167,7 @@ out_rollback:
le16_add_cpu(&group->bg_free_bits_count, backups);
le16_add_cpu(&group->bg_bits, -1 * num_bits);
le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits);
+ group->bg_contig_free_bits = old_bg_contig_free_bits;
}
out:
if (ret)
@@ -566,6 +574,8 @@ out_commit:
ocfs2_commit_trans(osb, handle);
out_free_group_bh:
+ if (ret < 0)
+ ocfs2_remove_from_cache(INODE_CACHE(inode), group_bh);
brelse(group_bh);
out_unlock:
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index da7718cef735..e544c704b583 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -37,7 +37,7 @@ struct ocfs2_slot_info {
unsigned int si_blocks;
struct buffer_head **si_bh;
unsigned int si_num_slots;
- struct ocfs2_slot si_slots[];
+ struct ocfs2_slot si_slots[] __counted_by(si_num_slots);
};
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index c973c03f6fd8..f58e891aa2da 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -227,7 +227,7 @@ static int o2cb_dlm_lock_status(struct ocfs2_dlm_lksb *lksb)
}
/*
- * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB
+ * o2dlm always has a "valid" LVB. If the dlm loses track of the LVB
* contents, it will zero out the LVB. Thus the caller can always trust
* the contents.
*/
@@ -404,7 +404,7 @@ static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn,
return 0;
}
-static struct ocfs2_stack_operations o2cb_stack_ops = {
+static const struct ocfs2_stack_operations o2cb_stack_ops = {
.connect = o2cb_cluster_connect,
.disconnect = o2cb_cluster_disconnect,
.this_node = o2cb_cluster_this_node,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 05d4414d0c33..be0a5758bd40 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -360,7 +360,6 @@ static int ocfs2_control_do_setnode_msg(struct file *file,
struct ocfs2_control_message_setn *msg)
{
long nodenum;
- char *ptr = NULL;
struct ocfs2_control_private *p = file->private_data;
if (ocfs2_control_get_handshake_state(file) !=
@@ -375,8 +374,7 @@ static int ocfs2_control_do_setnode_msg(struct file *file,
return -EINVAL;
msg->space = msg->newline = '\0';
- nodenum = simple_strtol(msg->nodestr, &ptr, 16);
- if (!ptr || *ptr)
+ if (kstrtol(msg->nodestr, 16, &nodenum))
return -EINVAL;
if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
@@ -391,7 +389,6 @@ static int ocfs2_control_do_setversion_msg(struct file *file,
struct ocfs2_control_message_setv *msg)
{
long major, minor;
- char *ptr = NULL;
struct ocfs2_control_private *p = file->private_data;
struct ocfs2_protocol_version *max =
&ocfs2_user_plugin.sp_max_proto;
@@ -409,11 +406,9 @@ static int ocfs2_control_do_setversion_msg(struct file *file,
return -EINVAL;
msg->space1 = msg->space2 = msg->newline = '\0';
- major = simple_strtol(msg->major, &ptr, 16);
- if (!ptr || *ptr)
+ if (kstrtol(msg->major, 16, &major))
return -EINVAL;
- minor = simple_strtol(msg->minor, &ptr, 16);
- if (!ptr || *ptr)
+ if (kstrtol(msg->minor, 16, &minor))
return -EINVAL;
/*
@@ -441,7 +436,6 @@ static int ocfs2_control_do_down_msg(struct file *file,
struct ocfs2_control_message_down *msg)
{
long nodenum;
- char *p = NULL;
if (ocfs2_control_get_handshake_state(file) !=
OCFS2_CONTROL_HANDSHAKE_VALID)
@@ -456,8 +450,7 @@ static int ocfs2_control_do_down_msg(struct file *file,
return -EINVAL;
msg->space1 = msg->space2 = msg->newline = '\0';
- nodenum = simple_strtol(msg->nodestr, &p, 16);
- if (!p || *p)
+ if (kstrtol(msg->nodestr, 16, &nodenum))
return -EINVAL;
if ((nodenum == LONG_MIN) || (nodenum == LONG_MAX) ||
@@ -738,20 +731,13 @@ static int user_plock(struct ocfs2_cluster_connection *conn,
*
* Internally, fs/dlm will pass these to a misc device, which
* a userspace daemon will read and write to.
- *
- * For now, cancel requests (which happen internally only),
- * are turned into unlocks. Most of this function taken from
- * gfs2_lock.
*/
- if (cmd == F_CANCELLK) {
- cmd = F_SETLK;
- fl->fl_type = F_UNLCK;
- }
-
- if (IS_GETLK(cmd))
+ if (cmd == F_CANCELLK)
+ return dlm_posix_cancel(conn->cc_lockspace, ino, file, fl);
+ else if (IS_GETLK(cmd))
return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
- else if (fl->fl_type == F_UNLCK)
+ else if (lock_is_unlock(fl))
return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
else
return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
@@ -966,7 +952,7 @@ static const struct dlm_lockspace_ops ocfs2_ls_ops = {
static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
{
version_unlock(conn);
- dlm_release_lockspace(conn->cc_lockspace, 2);
+ dlm_release_lockspace(conn->cc_lockspace, DLM_RELEASE_NORMAL);
conn->cc_lockspace = NULL;
ocfs2_live_connection_drop(conn->cc_private);
conn->cc_private = NULL;
@@ -1025,6 +1011,7 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
printk(KERN_ERR "ocfs2: Could not determine"
" locking version\n");
user_cluster_disconnect(conn);
+ lc = NULL;
goto out;
}
wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0));
@@ -1072,7 +1059,7 @@ static int user_cluster_this_node(struct ocfs2_cluster_connection *conn,
return 0;
}
-static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
+static const struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
.connect = user_cluster_connect,
.disconnect = user_cluster_disconnect,
.this_node = user_cluster_this_node,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index a8d5ca98fa57..a28c127b9934 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -650,7 +650,7 @@ error:
* and easier to preserve the name.
*/
-static struct ctl_table ocfs2_nm_table[] = {
+static const struct ctl_table ocfs2_nm_table[] = {
{
.procname = "hb_ctl_path",
.data = ocfs2_hb_ctl_path,
@@ -658,7 +658,6 @@ static struct ctl_table ocfs2_nm_table[] = {
.mode = 0644,
.proc_handler = proc_dostring,
},
- { }
};
static struct ctl_table_header *ocfs2_table_header;
@@ -692,8 +691,7 @@ static void __exit ocfs2_stack_glue_exit(void)
memset(&locking_max_version, 0,
sizeof(struct ocfs2_protocol_version));
ocfs2_sysfs_exit();
- if (ocfs2_table_header)
- unregister_sysctl_table(ocfs2_table_header);
+ unregister_sysctl_table(ocfs2_table_header);
}
MODULE_AUTHOR("Oracle");
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 3636847fae19..5486a6dce70a 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -210,7 +210,7 @@ struct ocfs2_stack_operations {
struct file_lock *fl);
/*
- * This is an optoinal debugging hook. If provided, the
+ * This is an optional debugging hook. If provided, the
* stack can dump debugging information about this lock.
*/
void (*dump_lksb)(struct ocfs2_dlm_lksb *lksb);
@@ -223,7 +223,7 @@ struct ocfs2_stack_operations {
*/
struct ocfs2_stack_plugin {
char *sp_name;
- struct ocfs2_stack_operations *sp_ops;
+ const struct ocfs2_stack_operations *sp_ops;
struct module *sp_owner;
/* These are managed by the stackglue code. */
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 166c8918c825..6ac4dcd54588 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -50,6 +50,10 @@ struct ocfs2_suballoc_result {
u64 sr_blkno; /* The first allocated block */
unsigned int sr_bit_offset; /* The bit in the bg */
unsigned int sr_bits; /* How many bits we claimed */
+ unsigned int sr_max_contig_bits; /* The length for contiguous
+ * free bits, only available
+ * for cluster group
+ */
};
static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
@@ -694,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
ac, cl);
- if (PTR_ERR(bg_bh) == -ENOSPC)
+ if (PTR_ERR(bg_bh) == -ENOSPC) {
+ ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
bg_bh = ocfs2_block_group_alloc_discontig(handle,
alloc_inode,
ac, cl);
+ }
if (IS_ERR(bg_bh)) {
status = PTR_ERR(bg_bh);
bg_bh = NULL;
@@ -1272,6 +1278,26 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
return ret;
}
+u16 ocfs2_find_max_contig_free_bits(void *bitmap,
+ u16 total_bits, u16 start)
+{
+ u16 offset, free_bits;
+ u16 contig_bits = 0;
+
+ while (start < total_bits) {
+ offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start);
+ if (offset == total_bits)
+ break;
+
+ start = ocfs2_find_next_bit(bitmap, total_bits, offset);
+ free_bits = start - offset;
+ if (contig_bits < free_bits)
+ contig_bits = free_bits;
+ }
+
+ return contig_bits;
+}
+
static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
struct buffer_head *bg_bh,
unsigned int bits_wanted,
@@ -1280,6 +1306,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
{
void *bitmap;
u16 best_offset, best_size;
+ u16 prev_best_size = 0;
int offset, start, found, status = 0;
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
@@ -1290,10 +1317,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
found = start = best_offset = best_size = 0;
bitmap = bg->bg_bitmap;
- while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
- if (offset == total_bits)
- break;
-
+ while ((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) <
+ total_bits) {
if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
/* We found a zero, but we can't use it as it
* hasn't been put to disk yet! */
@@ -1308,6 +1333,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
/* got a zero after some ones */
found = 1;
start = offset + 1;
+ prev_best_size = best_size;
}
if (found > best_size) {
best_size = found;
@@ -1320,6 +1346,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
}
}
+ /* best_size will be allocated, we save prev_best_size */
+ res->sr_max_contig_bits = prev_best_size;
if (best_size) {
res->sr_bit_offset = best_offset;
res->sr_bits = best_size;
@@ -1337,11 +1365,16 @@ int ocfs2_block_group_set_bits(handle_t *handle,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
unsigned int bit_off,
- unsigned int num_bits)
+ unsigned int num_bits,
+ unsigned int max_contig_bits,
+ int fastpath)
{
int status;
void *bitmap = bg->bg_bitmap;
int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
+ unsigned int start = bit_off + num_bits;
+ u16 contig_bits;
+ struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
/* All callers get the descriptor via
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
@@ -1373,6 +1406,29 @@ int ocfs2_block_group_set_bits(handle_t *handle,
while(num_bits--)
ocfs2_set_bit(bit_off++, bitmap);
+ /*
+ * this is optimize path, caller set old contig value
+ * in max_contig_bits to bypass finding action.
+ */
+ if (fastpath) {
+ bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits);
+ } else if (ocfs2_is_cluster_bitmap(alloc_inode)) {
+ /*
+ * Usually, the block group bitmap allocates only 1 bit
+ * at a time, while the cluster group allocates n bits
+ * each time. Therefore, we only save the contig bits for
+ * the cluster group.
+ */
+ contig_bits = ocfs2_find_max_contig_free_bits(bitmap,
+ le16_to_cpu(bg->bg_bits), start);
+ if (contig_bits > max_contig_bits)
+ max_contig_bits = contig_bits;
+ bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits);
+ ocfs2_local_alloc_seen_free_bits(osb, max_contig_bits);
+ } else {
+ bg->bg_contig_free_bits = 0;
+ }
+
ocfs2_journal_dirty(handle, group_bh);
bail:
@@ -1486,7 +1542,12 @@ static int ocfs2_cluster_group_search(struct inode *inode,
BUG_ON(!ocfs2_is_cluster_bitmap(inode));
- if (gd->bg_free_bits_count) {
+ if (le16_to_cpu(gd->bg_contig_free_bits) &&
+ le16_to_cpu(gd->bg_contig_free_bits) < bits_wanted)
+ return -ENOSPC;
+
+ /* ->bg_contig_free_bits may un-initialized, so compare again */
+ if (le16_to_cpu(gd->bg_free_bits_count) >= bits_wanted) {
max_bits = le16_to_cpu(gd->bg_bits);
/* Tail groups in cluster bitmaps which aren't cpg
@@ -1530,13 +1591,6 @@ static int ocfs2_cluster_group_search(struct inode *inode,
* of bits. */
if (min_bits <= res->sr_bits)
search = 0; /* success */
- else if (res->sr_bits) {
- /*
- * Don't show bits which we'll be returning
- * for allocation to the local alloc bitmap.
- */
- ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits);
- }
}
return search;
@@ -1555,7 +1609,7 @@ static int ocfs2_block_group_search(struct inode *inode,
BUG_ON(min_bits != 1);
BUG_ON(ocfs2_is_cluster_bitmap(inode));
- if (bg->bg_free_bits_count) {
+ if (le16_to_cpu(bg->bg_free_bits_count) >= bits_wanted) {
ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
group_bh, bits_wanted,
le16_to_cpu(bg->bg_bits),
@@ -1715,7 +1769,8 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
}
ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
- res->sr_bit_offset, res->sr_bits);
+ res->sr_bit_offset, res->sr_bits,
+ res->sr_max_contig_bits, 0);
if (ret < 0) {
ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh,
res->sr_bits,
@@ -1741,6 +1796,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
{
int status;
u16 chain;
+ u32 contig_bits;
u64 next_group;
struct inode *alloc_inode = ac->ac_inode;
struct buffer_head *group_bh = NULL;
@@ -1766,10 +1822,21 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
status = -ENOSPC;
/* for now, the chain search is a bit simplistic. We just use
* the 1st group with any empty bits. */
- while ((status = ac->ac_group_search(alloc_inode, group_bh,
- bits_wanted, min_bits,
- ac->ac_max_block,
- res)) == -ENOSPC) {
+ while (1) {
+ if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) {
+ contig_bits = le16_to_cpu(bg->bg_contig_free_bits);
+ if (!contig_bits)
+ contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap,
+ le16_to_cpu(bg->bg_bits), 0);
+ if (bits_wanted > contig_bits && contig_bits >= min_bits)
+ bits_wanted = contig_bits;
+ }
+
+ status = ac->ac_group_search(alloc_inode, group_bh,
+ bits_wanted, min_bits,
+ ac->ac_max_block, res);
+ if (status != -ENOSPC)
+ break;
if (!bg->bg_next_group)
break;
@@ -1849,7 +1916,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
bg,
group_bh,
res->sr_bit_offset,
- res->sr_bits);
+ res->sr_bits,
+ res->sr_max_contig_bits,
+ 0);
if (status < 0) {
ocfs2_rollback_alloc_dinode_counts(alloc_inode,
ac->ac_bh, res->sr_bits, chain);
@@ -1927,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
victim = ocfs2_find_victim_chain(cl);
ac->ac_chain = victim;
+search:
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
res, &bits_left);
if (!status) {
@@ -1951,7 +2021,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
if (i == victim)
continue;
- if (!cl->cl_recs[i].c_free)
+ if (le32_to_cpu(cl->cl_recs[i].c_free) < bits_wanted)
continue;
ac->ac_chain = i;
@@ -1967,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
}
}
+ /* Chains can't supply the bits_wanted contiguous space.
+ * We should switch to using every single bit when allocating
+ * from the global bitmap. */
+ if (i == le16_to_cpu(cl->cl_next_free_rec) &&
+ status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) {
+ ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
+ ac->ac_chain = victim;
+ goto search;
+ }
+
set_hint:
if (status != -ENOSPC) {
/* If the next search of this group is not likely to
@@ -2163,7 +2243,9 @@ int ocfs2_claim_new_inode_at_loc(handle_t *handle,
bg,
bg_bh,
res->sr_bit_offset,
- res->sr_bits);
+ res->sr_bits,
+ res->sr_max_contig_bits,
+ 0);
if (ret < 0) {
ocfs2_rollback_alloc_dinode_counts(ac->ac_inode,
ac->ac_bh, res->sr_bits, chain);
@@ -2308,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
- && ac->ac_which != OCFS2_AC_USE_MAIN);
+ && ac->ac_which != OCFS2_AC_USE_MAIN
+ && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG);
if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
WARN_ON(min_clusters > 1);
@@ -2382,11 +2465,13 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
struct buffer_head *group_bh,
unsigned int bit_off,
unsigned int num_bits,
+ unsigned int max_contig_bits,
void (*undo_fn)(unsigned int bit,
unsigned long *bmap))
{
int status;
unsigned int tmp;
+ u16 contig_bits;
struct ocfs2_group_desc *undo_bg = NULL;
struct journal_head *jh;
@@ -2433,6 +2518,20 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
num_bits);
}
+ /*
+ * TODO: even 'num_bits == 1' (the worst case, release 1 cluster),
+ * we still need to rescan whole bitmap.
+ */
+ if (ocfs2_is_cluster_bitmap(alloc_inode)) {
+ contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap,
+ le16_to_cpu(bg->bg_bits), 0);
+ if (contig_bits > max_contig_bits)
+ max_contig_bits = contig_bits;
+ bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits);
+ } else {
+ bg->bg_contig_free_bits = 0;
+ }
+
if (undo_fn)
spin_unlock(&jh->b_state_lock);
@@ -2459,6 +2558,7 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
struct ocfs2_chain_list *cl = &fe->id2.i_chain;
struct buffer_head *group_bh = NULL;
struct ocfs2_group_desc *group;
+ __le16 old_bg_contig_free_bits = 0;
/* The alloc_bh comes from ocfs2_free_dinode() or
* ocfs2_free_clusters(). The callers have all locked the
@@ -2483,9 +2583,11 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
+ if (ocfs2_is_cluster_bitmap(alloc_inode))
+ old_bg_contig_free_bits = group->bg_contig_free_bits;
status = ocfs2_block_group_clear_bits(handle, alloc_inode,
group, group_bh,
- start_bit, count, undo_fn);
+ start_bit, count, 0, undo_fn);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -2496,7 +2598,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
if (status < 0) {
mlog_errno(status);
ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh,
- start_bit, count);
+ start_bit, count,
+ le16_to_cpu(old_bg_contig_free_bits), 1);
goto bail;
}
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 9c74eace3adc..bcf2ed4a8631 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -29,6 +29,7 @@ struct ocfs2_alloc_context {
#define OCFS2_AC_USE_MAIN 2
#define OCFS2_AC_USE_INODE 3
#define OCFS2_AC_USE_META 4
+#define OCFS2_AC_USE_MAIN_DISCONTIG 5
u32 ac_which;
/* these are used by the chain search */
@@ -79,12 +80,16 @@ void ocfs2_rollback_alloc_dinode_counts(struct inode *inode,
struct buffer_head *di_bh,
u32 num_bits,
u16 chain);
+u16 ocfs2_find_max_contig_free_bits(void *bitmap,
+ u16 total_bits, u16 start);
int ocfs2_block_group_set_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
unsigned int bit_off,
- unsigned int num_bits);
+ unsigned int num_bits,
+ unsigned int max_contig_bits,
+ int fastpath);
int ocfs2_claim_metadata(handle_t *handle,
struct ocfs2_alloc_context *ac,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 6b906424902b..2c7ba1480f7a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -19,10 +19,10 @@
#include <linux/blkdev.h>
#include <linux/socket.h>
#include <linux/inet.h>
-#include <linux/parser.h>
+#include <linux/fs_parser.h>
+#include <linux/fs_context.h>
#include <linux/crc32.h>
#include <linux/debugfs.h>
-#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/quotaops.h>
#include <linux/signal.h>
@@ -80,17 +80,15 @@ struct mount_options
unsigned int resv_level;
int dir_resv_level;
char cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
+ bool user_stack;
};
-static int ocfs2_parse_options(struct super_block *sb, char *options,
- struct mount_options *mopt,
- int is_remount);
+static int ocfs2_parse_param(struct fs_context *fc, struct fs_parameter *param);
static int ocfs2_check_set_options(struct super_block *sb,
struct mount_options *options);
static int ocfs2_show_options(struct seq_file *s, struct dentry *root);
static void ocfs2_put_super(struct super_block *sb);
static int ocfs2_mount_volume(struct super_block *sb);
-static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err);
static int ocfs2_initialize_mem_caches(void);
static void ocfs2_free_mem_caches(void);
@@ -122,7 +120,7 @@ static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
static int ocfs2_enable_quotas(struct ocfs2_super *osb);
static void ocfs2_disable_quotas(struct ocfs2_super *osb);
-static struct dquot **ocfs2_get_dquots(struct inode *inode)
+static struct dquot __rcu **ocfs2_get_dquots(struct inode *inode)
{
return OCFS2_I(inode)->i_dquot;
}
@@ -131,11 +129,10 @@ static const struct super_operations ocfs2_sops = {
.statfs = ocfs2_statfs,
.alloc_inode = ocfs2_alloc_inode,
.free_inode = ocfs2_free_inode,
- .drop_inode = ocfs2_drop_inode,
+ .drop_inode = inode_just_drop,
.evict_inode = ocfs2_evict_inode,
.sync_fs = ocfs2_sync_fs,
.put_super = ocfs2_put_super,
- .remount_fs = ocfs2_remount,
.show_options = ocfs2_show_options,
.quota_read = ocfs2_quota_read,
.quota_write = ocfs2_quota_write,
@@ -144,15 +141,10 @@ static const struct super_operations ocfs2_sops = {
enum {
Opt_barrier,
- Opt_err_panic,
- Opt_err_ro,
+ Opt_errors,
Opt_intr,
- Opt_nointr,
- Opt_hb_none,
- Opt_hb_local,
- Opt_hb_global,
- Opt_data_ordered,
- Opt_data_writeback,
+ Opt_heartbeat,
+ Opt_data,
Opt_atime_quantum,
Opt_slot,
Opt_commit,
@@ -160,52 +152,64 @@ enum {
Opt_localflocks,
Opt_stack,
Opt_user_xattr,
- Opt_nouser_xattr,
Opt_inode64,
Opt_acl,
- Opt_noacl,
Opt_usrquota,
Opt_grpquota,
- Opt_coherency_buffered,
- Opt_coherency_full,
+ Opt_coherency,
Opt_resv_level,
Opt_dir_resv_level,
Opt_journal_async_commit,
- Opt_err_cont,
- Opt_err,
};
-static const match_table_t tokens = {
- {Opt_barrier, "barrier=%u"},
- {Opt_err_panic, "errors=panic"},
- {Opt_err_ro, "errors=remount-ro"},
- {Opt_intr, "intr"},
- {Opt_nointr, "nointr"},
- {Opt_hb_none, OCFS2_HB_NONE},
- {Opt_hb_local, OCFS2_HB_LOCAL},
- {Opt_hb_global, OCFS2_HB_GLOBAL},
- {Opt_data_ordered, "data=ordered"},
- {Opt_data_writeback, "data=writeback"},
- {Opt_atime_quantum, "atime_quantum=%u"},
- {Opt_slot, "preferred_slot=%u"},
- {Opt_commit, "commit=%u"},
- {Opt_localalloc, "localalloc=%d"},
- {Opt_localflocks, "localflocks"},
- {Opt_stack, "cluster_stack=%s"},
- {Opt_user_xattr, "user_xattr"},
- {Opt_nouser_xattr, "nouser_xattr"},
- {Opt_inode64, "inode64"},
- {Opt_acl, "acl"},
- {Opt_noacl, "noacl"},
- {Opt_usrquota, "usrquota"},
- {Opt_grpquota, "grpquota"},
- {Opt_coherency_buffered, "coherency=buffered"},
- {Opt_coherency_full, "coherency=full"},
- {Opt_resv_level, "resv_level=%u"},
- {Opt_dir_resv_level, "dir_resv_level=%u"},
- {Opt_journal_async_commit, "journal_async_commit"},
- {Opt_err_cont, "errors=continue"},
- {Opt_err, NULL}
+static const struct constant_table ocfs2_param_errors[] = {
+ {"panic", OCFS2_MOUNT_ERRORS_PANIC},
+ {"remount-ro", OCFS2_MOUNT_ERRORS_ROFS},
+ {"continue", OCFS2_MOUNT_ERRORS_CONT},
+ {}
+};
+
+static const struct constant_table ocfs2_param_heartbeat[] = {
+ {"local", OCFS2_MOUNT_HB_LOCAL},
+ {"none", OCFS2_MOUNT_HB_NONE},
+ {"global", OCFS2_MOUNT_HB_GLOBAL},
+ {}
+};
+
+static const struct constant_table ocfs2_param_data[] = {
+ {"writeback", OCFS2_MOUNT_DATA_WRITEBACK},
+ {"ordered", 0},
+ {}
+};
+
+static const struct constant_table ocfs2_param_coherency[] = {
+ {"buffered", OCFS2_MOUNT_COHERENCY_BUFFERED},
+ {"full", 0},
+ {}
+};
+
+static const struct fs_parameter_spec ocfs2_param_spec[] = {
+ fsparam_u32 ("barrier", Opt_barrier),
+ fsparam_enum ("errors", Opt_errors, ocfs2_param_errors),
+ fsparam_flag_no ("intr", Opt_intr),
+ fsparam_enum ("heartbeat", Opt_heartbeat, ocfs2_param_heartbeat),
+ fsparam_enum ("data", Opt_data, ocfs2_param_data),
+ fsparam_u32 ("atime_quantum", Opt_atime_quantum),
+ fsparam_u32 ("preferred_slot", Opt_slot),
+ fsparam_u32 ("commit", Opt_commit),
+ fsparam_s32 ("localalloc", Opt_localalloc),
+ fsparam_flag ("localflocks", Opt_localflocks),
+ fsparam_string ("cluster_stack", Opt_stack),
+ fsparam_flag_no ("user_xattr", Opt_user_xattr),
+ fsparam_flag ("inode64", Opt_inode64),
+ fsparam_flag_no ("acl", Opt_acl),
+ fsparam_flag ("usrquota", Opt_usrquota),
+ fsparam_flag ("grpquota", Opt_grpquota),
+ fsparam_enum ("coherency", Opt_coherency, ocfs2_param_coherency),
+ fsparam_u32 ("resv_level", Opt_resv_level),
+ fsparam_u32 ("dir_resv_level", Opt_dir_resv_level),
+ fsparam_flag ("journal_async_commit", Opt_journal_async_commit),
+ {}
};
#ifdef CONFIG_DEBUG_FS
@@ -600,32 +604,32 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
return (((unsigned long long)bytes) << bitshift) - trim;
}
-static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
+static int ocfs2_reconfigure(struct fs_context *fc)
{
int incompat_features;
int ret = 0;
- struct mount_options parsed_options;
+ struct mount_options *parsed_options = fc->fs_private;
+ struct super_block *sb = fc->root->d_sb;
struct ocfs2_super *osb = OCFS2_SB(sb);
u32 tmp;
sync_filesystem(sb);
- if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
- !ocfs2_check_set_options(sb, &parsed_options)) {
+ if (!ocfs2_check_set_options(sb, parsed_options)) {
ret = -EINVAL;
goto out;
}
tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
OCFS2_MOUNT_HB_NONE;
- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
+ if ((osb->s_mount_opt & tmp) != (parsed_options->mount_opt & tmp)) {
ret = -EINVAL;
mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n");
goto out;
}
if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) !=
- (parsed_options.mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) {
+ (parsed_options->mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) {
ret = -EINVAL;
mlog(ML_ERROR, "Cannot change data mode on remount\n");
goto out;
@@ -634,16 +638,16 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
/* Probably don't want this on remount; it might
* mess with other nodes */
if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) &&
- (parsed_options.mount_opt & OCFS2_MOUNT_INODE64)) {
+ (parsed_options->mount_opt & OCFS2_MOUNT_INODE64)) {
ret = -EINVAL;
mlog(ML_ERROR, "Cannot enable inode64 on remount\n");
goto out;
}
/* We're going to/from readonly mode. */
- if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
+ if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
/* Disable quota accounting before remounting RO */
- if (*flags & SB_RDONLY) {
+ if (fc->sb_flags & SB_RDONLY) {
ret = ocfs2_susp_quotas(osb, 0);
if (ret < 0)
goto out;
@@ -657,7 +661,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
goto unlock_osb;
}
- if (*flags & SB_RDONLY) {
+ if (fc->sb_flags & SB_RDONLY) {
sb->s_flags |= SB_RDONLY;
osb->osb_flags |= OCFS2_OSB_SOFT_RO;
} else {
@@ -678,11 +682,11 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags &= ~SB_RDONLY;
osb->osb_flags &= ~OCFS2_OSB_SOFT_RO;
}
- trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags);
+ trace_ocfs2_remount(sb->s_flags, osb->osb_flags, fc->sb_flags);
unlock_osb:
spin_unlock(&osb->osb_lock);
/* Enable quota accounting after remounting RW */
- if (!ret && !(*flags & SB_RDONLY)) {
+ if (!ret && !(fc->sb_flags & SB_RDONLY)) {
if (sb_any_quota_suspended(sb))
ret = ocfs2_susp_quotas(osb, 1);
else
@@ -701,11 +705,11 @@ unlock_osb:
if (!ret) {
/* Only save off the new mount options in case of a successful
* remount. */
- osb->s_mount_opt = parsed_options.mount_opt;
- osb->s_atime_quantum = parsed_options.atime_quantum;
- osb->preferred_slot = parsed_options.slot;
- if (parsed_options.commit_interval)
- osb->osb_commit_interval = parsed_options.commit_interval;
+ osb->s_mount_opt = parsed_options->mount_opt;
+ osb->s_atime_quantum = parsed_options->atime_quantum;
+ osb->preferred_slot = parsed_options->slot;
+ if (parsed_options->commit_interval)
+ osb->osb_commit_interval = parsed_options->commit_interval;
if (!ocfs2_is_hard_readonly(osb))
ocfs2_set_journal_params(osb);
@@ -966,23 +970,18 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
}
}
-static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+static int ocfs2_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct dentry *root;
int status, sector_size;
- struct mount_options parsed_options;
+ struct mount_options *parsed_options = fc->fs_private;
struct inode *inode = NULL;
struct ocfs2_super *osb = NULL;
struct buffer_head *bh = NULL;
char nodestr[12];
struct ocfs2_blockcheck_stats stats;
- trace_ocfs2_fill_super(sb, data, silent);
-
- if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) {
- status = -EINVAL;
- goto out;
- }
+ trace_ocfs2_fill_super(sb, fc, fc->sb_flags & SB_SILENT);
/* probe for superblock */
status = ocfs2_sb_probe(sb, &bh, &sector_size, &stats);
@@ -999,24 +998,24 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb = OCFS2_SB(sb);
- if (!ocfs2_check_set_options(sb, &parsed_options)) {
+ if (!ocfs2_check_set_options(sb, parsed_options)) {
status = -EINVAL;
goto out_super;
}
- osb->s_mount_opt = parsed_options.mount_opt;
- osb->s_atime_quantum = parsed_options.atime_quantum;
- osb->preferred_slot = parsed_options.slot;
- osb->osb_commit_interval = parsed_options.commit_interval;
+ osb->s_mount_opt = parsed_options->mount_opt;
+ osb->s_atime_quantum = parsed_options->atime_quantum;
+ osb->preferred_slot = parsed_options->slot;
+ osb->osb_commit_interval = parsed_options->commit_interval;
- ocfs2_la_set_sizes(osb, parsed_options.localalloc_opt);
- osb->osb_resv_level = parsed_options.resv_level;
- osb->osb_dir_resv_level = parsed_options.resv_level;
- if (parsed_options.dir_resv_level == -1)
- osb->osb_dir_resv_level = parsed_options.resv_level;
+ ocfs2_la_set_sizes(osb, parsed_options->localalloc_opt);
+ osb->osb_resv_level = parsed_options->resv_level;
+ osb->osb_dir_resv_level = parsed_options->resv_level;
+ if (parsed_options->dir_resv_level == -1)
+ osb->osb_dir_resv_level = parsed_options->resv_level;
else
- osb->osb_dir_resv_level = parsed_options.dir_resv_level;
+ osb->osb_dir_resv_level = parsed_options->dir_resv_level;
- status = ocfs2_verify_userspace_stack(osb, &parsed_options);
+ status = ocfs2_verify_userspace_stack(osb, parsed_options);
if (status)
goto out_super;
@@ -1075,9 +1074,11 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root,
osb, &ocfs2_osb_debug_fops);
- if (ocfs2_meta_ecc(osb))
+ if (ocfs2_meta_ecc(osb)) {
+ ocfs2_initialize_journal_triggers(sb, osb->s_journal_triggers);
ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats,
osb->osb_debug_root);
+ }
status = ocfs2_mount_volume(sb);
if (status < 0)
@@ -1178,27 +1179,72 @@ out:
return status;
}
-static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *data)
+static int ocfs2_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, ocfs2_fill_super);
+}
+
+static void ocfs2_free_fc(struct fs_context *fc)
+{
+ kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations ocfs2_context_ops = {
+ .parse_param = ocfs2_parse_param,
+ .get_tree = ocfs2_get_tree,
+ .reconfigure = ocfs2_reconfigure,
+ .free = ocfs2_free_fc,
+};
+
+static int ocfs2_init_fs_context(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
+ struct mount_options *mopt;
+
+ mopt = kzalloc(sizeof(struct mount_options), GFP_KERNEL);
+ if (!mopt)
+ return -EINVAL;
+
+ mopt->commit_interval = 0;
+ mopt->mount_opt = OCFS2_MOUNT_NOINTR;
+ mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
+ mopt->slot = OCFS2_INVALID_SLOT;
+ mopt->localalloc_opt = -1;
+ mopt->cluster_stack[0] = '\0';
+ mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
+ mopt->dir_resv_level = -1;
+
+ fc->fs_private = mopt;
+ fc->ops = &ocfs2_context_ops;
+
+ return 0;
}
static struct file_system_type ocfs2_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2",
- .mount = ocfs2_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
- .next = NULL
+ .next = NULL,
+ .init_fs_context = ocfs2_init_fs_context,
+ .parameters = ocfs2_param_spec,
};
MODULE_ALIAS_FS("ocfs2");
static int ocfs2_check_set_options(struct super_block *sb,
struct mount_options *options)
{
+ if (options->user_stack == 0) {
+ u32 tmp;
+
+ /* Ensure only one heartbeat mode */
+ tmp = options->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
+ OCFS2_MOUNT_HB_GLOBAL |
+ OCFS2_MOUNT_HB_NONE);
+ if (hweight32(tmp) != 1) {
+ mlog(ML_ERROR, "Invalid heartbeat mount options\n");
+ return 0;
+ }
+ }
if (options->mount_opt & OCFS2_MOUNT_USRQUOTA &&
!OCFS2_HAS_RO_COMPAT_FEATURE(sb,
OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
@@ -1230,241 +1276,142 @@ static int ocfs2_check_set_options(struct super_block *sb,
return 1;
}
-static int ocfs2_parse_options(struct super_block *sb,
- char *options,
- struct mount_options *mopt,
- int is_remount)
+static int ocfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- int status, user_stack = 0;
- char *p;
- u32 tmp;
- int token, option;
- substring_t args[MAX_OPT_ARGS];
-
- trace_ocfs2_parse_options(is_remount, options ? options : "(none)");
-
- mopt->commit_interval = 0;
- mopt->mount_opt = OCFS2_MOUNT_NOINTR;
- mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
- mopt->slot = OCFS2_INVALID_SLOT;
- mopt->localalloc_opt = -1;
- mopt->cluster_stack[0] = '\0';
- mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
- mopt->dir_resv_level = -1;
-
- if (!options) {
- status = 1;
- goto bail;
- }
-
- while ((p = strsep(&options, ",")) != NULL) {
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_hb_local:
- mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL;
- break;
- case Opt_hb_none:
- mopt->mount_opt |= OCFS2_MOUNT_HB_NONE;
- break;
- case Opt_hb_global:
- mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL;
- break;
- case Opt_barrier:
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option)
- mopt->mount_opt |= OCFS2_MOUNT_BARRIER;
- else
- mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER;
- break;
- case Opt_intr:
- mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR;
- break;
- case Opt_nointr:
+ struct fs_parse_result result;
+ int opt;
+ struct mount_options *mopt = fc->fs_private;
+ bool is_remount = (fc->purpose & FS_CONTEXT_FOR_RECONFIGURE);
+
+ trace_ocfs2_parse_options(is_remount, param->key);
+
+ opt = fs_parse(fc, ocfs2_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_heartbeat:
+ mopt->mount_opt |= result.uint_32;
+ break;
+ case Opt_barrier:
+ if (result.uint_32)
+ mopt->mount_opt |= OCFS2_MOUNT_BARRIER;
+ else
+ mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER;
+ break;
+ case Opt_intr:
+ if (result.negated)
mopt->mount_opt |= OCFS2_MOUNT_NOINTR;
- break;
- case Opt_err_panic:
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT;
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS;
- mopt->mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
- break;
- case Opt_err_ro:
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT;
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
- mopt->mount_opt |= OCFS2_MOUNT_ERRORS_ROFS;
- break;
- case Opt_err_cont:
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS;
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
- mopt->mount_opt |= OCFS2_MOUNT_ERRORS_CONT;
- break;
- case Opt_data_ordered:
- mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
- break;
- case Opt_data_writeback:
- mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK;
- break;
- case Opt_user_xattr:
- mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR;
- break;
- case Opt_nouser_xattr:
+ else
+ mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR;
+ break;
+ case Opt_errors:
+ mopt->mount_opt &= ~(OCFS2_MOUNT_ERRORS_CONT |
+ OCFS2_MOUNT_ERRORS_ROFS |
+ OCFS2_MOUNT_ERRORS_PANIC);
+ mopt->mount_opt |= result.uint_32;
+ break;
+ case Opt_data:
+ mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
+ mopt->mount_opt |= result.uint_32;
+ break;
+ case Opt_user_xattr:
+ if (result.negated)
mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR;
- break;
- case Opt_atime_quantum:
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option >= 0)
- mopt->atime_quantum = option;
- break;
- case Opt_slot:
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option)
- mopt->slot = (u16)option;
- break;
- case Opt_commit:
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option < 0)
- return 0;
- if (option == 0)
- option = JBD2_DEFAULT_MAX_COMMIT_AGE;
- mopt->commit_interval = HZ * option;
- break;
- case Opt_localalloc:
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option >= 0)
- mopt->localalloc_opt = option;
- break;
- case Opt_localflocks:
- /*
- * Changing this during remount could race
- * flock() requests, or "unbalance" existing
- * ones (e.g., a lock is taken in one mode but
- * dropped in the other). If users care enough
- * to flip locking modes during remount, we
- * could add a "local" flag to individual
- * flock structures for proper tracking of
- * state.
- */
- if (!is_remount)
- mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS;
- break;
- case Opt_stack:
- /* Check both that the option we were passed
- * is of the right length and that it is a proper
- * string of the right length.
- */
- if (((args[0].to - args[0].from) !=
- OCFS2_STACK_LABEL_LEN) ||
- (strnlen(args[0].from,
- OCFS2_STACK_LABEL_LEN) !=
- OCFS2_STACK_LABEL_LEN)) {
- mlog(ML_ERROR,
- "Invalid cluster_stack option\n");
- status = 0;
- goto bail;
- }
- memcpy(mopt->cluster_stack, args[0].from,
- OCFS2_STACK_LABEL_LEN);
- mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
- /*
- * Open code the memcmp here as we don't have
- * an osb to pass to
- * ocfs2_userspace_stack().
- */
- if (memcmp(mopt->cluster_stack,
- OCFS2_CLASSIC_CLUSTER_STACK,
- OCFS2_STACK_LABEL_LEN))
- user_stack = 1;
- break;
- case Opt_inode64:
- mopt->mount_opt |= OCFS2_MOUNT_INODE64;
- break;
- case Opt_usrquota:
- mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
- break;
- case Opt_grpquota:
- mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
- break;
- case Opt_coherency_buffered:
- mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED;
- break;
- case Opt_coherency_full:
- mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED;
- break;
- case Opt_acl:
- mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
- mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
- break;
- case Opt_noacl:
+ else
+ mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR;
+ break;
+ case Opt_atime_quantum:
+ mopt->atime_quantum = result.uint_32;
+ break;
+ case Opt_slot:
+ if (result.uint_32)
+ mopt->slot = (u16)result.uint_32;
+ break;
+ case Opt_commit:
+ if (result.uint_32 == 0)
+ mopt->commit_interval = HZ * JBD2_DEFAULT_MAX_COMMIT_AGE;
+ else
+ mopt->commit_interval = HZ * result.uint_32;
+ break;
+ case Opt_localalloc:
+ if (result.int_32 >= 0)
+ mopt->localalloc_opt = result.int_32;
+ break;
+ case Opt_localflocks:
+ /*
+ * Changing this during remount could race flock() requests, or
+ * "unbalance" existing ones (e.g., a lock is taken in one mode
+ * but dropped in the other). If users care enough to flip
+ * locking modes during remount, we could add a "local" flag to
+ * individual flock structures for proper tracking of state.
+ */
+ if (!is_remount)
+ mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS;
+ break;
+ case Opt_stack:
+ /* Check both that the option we were passed is of the right
+ * length and that it is a proper string of the right length.
+ */
+ if (strlen(param->string) != OCFS2_STACK_LABEL_LEN) {
+ mlog(ML_ERROR, "Invalid cluster_stack option\n");
+ return -EINVAL;
+ }
+ memcpy(mopt->cluster_stack, param->string, OCFS2_STACK_LABEL_LEN);
+ mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
+ /*
+ * Open code the memcmp here as we don't have an osb to pass
+ * to ocfs2_userspace_stack().
+ */
+ if (memcmp(mopt->cluster_stack,
+ OCFS2_CLASSIC_CLUSTER_STACK,
+ OCFS2_STACK_LABEL_LEN))
+ mopt->user_stack = 1;
+ break;
+ case Opt_inode64:
+ mopt->mount_opt |= OCFS2_MOUNT_INODE64;
+ break;
+ case Opt_usrquota:
+ mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
+ break;
+ case Opt_grpquota:
+ mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
+ break;
+ case Opt_coherency:
+ mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED;
+ mopt->mount_opt |= result.uint_32;
+ break;
+ case Opt_acl:
+ if (result.negated) {
mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
+ } else {
+ mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
+ mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
+ }
+ break;
+ case Opt_resv_level:
+ if (is_remount)
break;
- case Opt_resv_level:
- if (is_remount)
- break;
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option >= OCFS2_MIN_RESV_LEVEL &&
- option < OCFS2_MAX_RESV_LEVEL)
- mopt->resv_level = option;
- break;
- case Opt_dir_resv_level:
- if (is_remount)
- break;
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option >= OCFS2_MIN_RESV_LEVEL &&
- option < OCFS2_MAX_RESV_LEVEL)
- mopt->dir_resv_level = option;
- break;
- case Opt_journal_async_commit:
- mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
+ if (result.uint_32 >= OCFS2_MIN_RESV_LEVEL &&
+ result.uint_32 < OCFS2_MAX_RESV_LEVEL)
+ mopt->resv_level = result.uint_32;
+ break;
+ case Opt_dir_resv_level:
+ if (is_remount)
break;
- default:
- mlog(ML_ERROR,
- "Unrecognized mount option \"%s\" "
- "or missing value\n", p);
- status = 0;
- goto bail;
- }
- }
-
- if (user_stack == 0) {
- /* Ensure only one heartbeat mode */
- tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
- OCFS2_MOUNT_HB_GLOBAL |
- OCFS2_MOUNT_HB_NONE);
- if (hweight32(tmp) != 1) {
- mlog(ML_ERROR, "Invalid heartbeat mount options\n");
- status = 0;
- goto bail;
- }
+ if (result.uint_32 >= OCFS2_MIN_RESV_LEVEL &&
+ result.uint_32 < OCFS2_MAX_RESV_LEVEL)
+ mopt->dir_resv_level = result.uint_32;
+ break;
+ case Opt_journal_async_commit:
+ mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
+ break;
+ default:
+ return -EINVAL;
}
- status = 1;
-
-bail:
- return status;
+ return 0;
}
static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
@@ -1569,15 +1516,13 @@ static int __init ocfs2_init(void)
ocfs2_set_locking_protocol();
- status = register_quota_format(&ocfs2_quota_format);
- if (status < 0)
- goto out3;
+ register_quota_format(&ocfs2_quota_format);
+
status = register_filesystem(&ocfs2_fs_type);
if (!status)
return 0;
unregister_quota_format(&ocfs2_quota_format);
-out3:
debugfs_remove(ocfs2_debugfs_root);
ocfs2_free_mem_caches();
out2:
@@ -1706,18 +1651,17 @@ static int ocfs2_initialize_mem_caches(void)
sizeof(struct ocfs2_inode_info),
0,
(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ SLAB_ACCOUNT),
ocfs2_inode_init_once);
ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache",
sizeof(struct ocfs2_dquot),
0,
- (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD),
+ SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
NULL);
ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache",
sizeof(struct ocfs2_quota_chunk),
0,
- (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
+ SLAB_RECLAIM_ACCOUNT,
NULL);
if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep ||
!ocfs2_qf_chunk_cachep) {
@@ -1859,7 +1803,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
osb = OCFS2_SB(sb);
BUG_ON(!osb);
- /* Remove file check sysfs related directores/files,
+ /* Remove file check sysfs related directories/files,
* and wait for the pending file check operations */
ocfs2_filecheck_remove_sysfs(osb);
@@ -1868,6 +1812,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
/* Orphan scan should be stopped as early as possible */
ocfs2_orphan_scan_stop(osb);
+ /* Stop quota recovery so that we can disable quotas */
+ ocfs2_recovery_disable_quota(osb);
+
ocfs2_disable_quotas(osb);
/* All dquots should be freed by now */
@@ -2015,7 +1962,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
sb->s_fs_info = osb;
sb->s_op = &ocfs2_sops;
- sb->s_d_op = &ocfs2_dentry_ops;
+ set_default_d_op(sb, &ocfs2_dentry_ops);
sb->s_export_op = &ocfs2_export_ops;
sb->s_qcop = &dquot_quotactl_sysfile_ops;
sb->dq_op = &ocfs2_quota_operations;
@@ -2027,8 +1974,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
- memcpy(&sb->s_uuid, di->id2.i_super.s_uuid,
- sizeof(di->id2.i_super.s_uuid));
+ super_set_uuid(sb, di->id2.i_super.s_uuid,
+ sizeof(di->id2.i_super.s_uuid));
osb->osb_dx_mask = (1 << (cbits - bbits)) - 1;
@@ -2320,6 +2267,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
struct ocfs2_blockcheck_stats *stats)
{
int status = -EAGAIN;
+ u32 blksz_bits;
if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
@@ -2334,11 +2282,15 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
goto out;
}
status = -EINVAL;
- if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) {
+ /* Acceptable block sizes are 512 bytes, 1K, 2K and 4K. */
+ blksz_bits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
+ if (blksz_bits < 9 || blksz_bits > 12) {
+ mlog(ML_ERROR, "found superblock with incorrect block "
+ "size bits: found %u, should be 9, 10, 11, or 12\n",
+ blksz_bits);
+ } else if ((1 << blksz_bits) != blksz) {
mlog(ML_ERROR, "found superblock with incorrect block "
- "size: found %u, should be %u\n",
- 1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits),
- blksz);
+ "size: found %u, should be %u\n", 1 << blksz_bits, blksz);
} else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) !=
OCFS2_MAJOR_REV_LEVEL ||
le16_to_cpu(di->id2.i_super.s_minor_rev_level) !=
@@ -2356,8 +2308,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
(unsigned long long)bh->b_blocknr);
} else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 ||
le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
- mlog(ML_ERROR, "bad cluster size found: %u\n",
- 1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits));
+ mlog(ML_ERROR, "bad cluster size bit found: %u\n",
+ le32_to_cpu(di->id2.i_super.s_clustersize_bits));
} else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) {
mlog(ML_ERROR, "bad root_blkno: 0\n");
} else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) {
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index d4c5fdcfa1e4..ad8be3300b49 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -54,31 +54,27 @@
static int ocfs2_fast_symlink_read_folio(struct file *f, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct buffer_head *bh = NULL;
int status = ocfs2_read_inode_block(inode, &bh);
struct ocfs2_dinode *fe;
const char *link;
- void *kaddr;
size_t len;
if (status < 0) {
mlog_errno(status);
- return status;
+ goto out;
}
fe = (struct ocfs2_dinode *) bh->b_data;
link = (char *) fe->id2.i_symlink;
/* will be less than a page size */
len = strnlen(link, ocfs2_fast_symlink_chars(inode->i_sb));
- kaddr = kmap_atomic(page);
- memcpy(kaddr, link, len + 1);
- kunmap_atomic(kaddr);
- SetPageUptodate(page);
- unlock_page(page);
+ memcpy_to_folio(folio, 0, link, len + 1);
+out:
+ folio_end_read(folio, status == 0);
brelse(bh);
- return 0;
+ return status;
}
const struct address_space_operations ocfs2_fast_symlink_aops = {
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 53a945da873b..d53a6cc866be 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -127,14 +127,14 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
char namebuf[40];
struct inode *inode = NULL;
u64 blkno;
- int status = 0;
+ int len, status = 0;
- ocfs2_sprintf_system_inode_name(namebuf,
- sizeof(namebuf),
- type, slot);
+ len = ocfs2_sprintf_system_inode_name(namebuf,
+ sizeof(namebuf),
+ type, slot);
- status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
- strlen(namebuf), &blkno);
+ status = ocfs2_lookup_ino_from_name(osb->sys_root_inode,
+ namebuf, len, &blkno);
if (status < 0) {
goto bail;
}
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 6510ad783c91..dc1761e84814 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -87,14 +87,14 @@ static struct ocfs2_xattr_def_value_root def_xv = {
.xv.xr_list.l_count = cpu_to_le16(1),
};
-const struct xattr_handler *ocfs2_xattr_handlers[] = {
+const struct xattr_handler * const ocfs2_xattr_handlers[] = {
&ocfs2_xattr_user_handler,
&ocfs2_xattr_trusted_handler,
&ocfs2_xattr_security_handler,
NULL
};
-static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
+static const struct xattr_handler * const ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
[OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
[OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access,
[OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default,
@@ -648,7 +648,7 @@ int ocfs2_calc_xattr_init(struct inode *dir,
* 256(name) + 80(value) + 16(entry) = 352 bytes,
* The max space of acl xattr taken inline is
* 80(value) + 16(entry) * 2(if directory) = 192 bytes,
- * when blocksize = 512, may reserve one more cluser for
+ * when blocksize = 512, may reserve one more cluster for
* xattr bucket, otherwise reserve one metadata block
* for them is ok.
* If this is a new directory with inline data,
@@ -1062,13 +1062,13 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
return i_ret + b_ret;
}
-static int ocfs2_xattr_find_entry(int name_index,
+static int ocfs2_xattr_find_entry(struct inode *inode, int name_index,
const char *name,
struct ocfs2_xattr_search *xs)
{
struct ocfs2_xattr_entry *entry;
size_t name_len;
- int i, cmp = 1;
+ int i, name_offset, cmp = 1;
if (name == NULL)
return -EINVAL;
@@ -1076,13 +1076,22 @@ static int ocfs2_xattr_find_entry(int name_index,
name_len = strlen(name);
entry = xs->here;
for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
+ if ((void *)entry >= xs->end) {
+ ocfs2_error(inode->i_sb, "corrupted xattr entries");
+ return -EFSCORRUPTED;
+ }
cmp = name_index - ocfs2_xattr_get_type(entry);
if (!cmp)
cmp = name_len - entry->xe_name_len;
- if (!cmp)
- cmp = memcmp(name, (xs->base +
- le16_to_cpu(entry->xe_name_offset)),
- name_len);
+ if (!cmp) {
+ name_offset = le16_to_cpu(entry->xe_name_offset);
+ if ((xs->base + name_offset + name_len) > xs->end) {
+ ocfs2_error(inode->i_sb,
+ "corrupted xattr entries");
+ return -EFSCORRUPTED;
+ }
+ cmp = memcmp(name, (xs->base + name_offset), name_len);
+ }
if (cmp == 0)
break;
entry += 1;
@@ -1166,7 +1175,7 @@ static int ocfs2_xattr_ibody_get(struct inode *inode,
xs->base = (void *)xs->header;
xs->here = xs->header->xh_entries;
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
if (ret)
return ret;
size = le64_to_cpu(xs->here->xe_value_size);
@@ -2027,8 +2036,7 @@ static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
rc = 0;
ocfs2_xa_cleanup_value_truncate(loc, "removing",
orig_clusters);
- if (rc)
- goto out;
+ goto out;
}
}
@@ -2698,7 +2706,7 @@ static int ocfs2_xattr_ibody_find(struct inode *inode,
/* Find the named attribute. */
if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
if (ret && ret != -ENODATA)
return ret;
xs->not_found = ret;
@@ -2833,7 +2841,7 @@ static int ocfs2_xattr_block_find(struct inode *inode,
xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
xs->here = xs->header->xh_entries;
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
} else
ret = ocfs2_xattr_index_block_find(inode, blk_bh,
name_index,
@@ -2900,7 +2908,7 @@ static int ocfs2_create_xattr_block(struct inode *inode,
/* Initialize ocfs2_xattr_block */
xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
memset(xblk, 0, inode->i_sb->s_blocksize);
- strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
+ strscpy(xblk->xb_signature, OCFS2_XATTR_BLOCK_SIGNATURE);
xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
@@ -3422,8 +3430,8 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
}
inode_set_ctime_current(inode);
- di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
- di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
+ di->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
+ di->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
}
out:
@@ -4158,15 +4166,6 @@ static int cmp_xe(const void *a, const void *b)
return 0;
}
-static void swap_xe(void *a, void *b, int size)
-{
- struct ocfs2_xattr_entry *l = a, *r = b, tmp;
-
- tmp = *l;
- memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
- memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
-}
-
/*
* When the ocfs2_xattr_block is filled up, new bucket will be created
* and all the xattr entries will be moved to the new bucket.
@@ -4232,7 +4231,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change);
sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
- cmp_xe, swap_xe);
+ cmp_xe, NULL);
}
/*
@@ -4372,7 +4371,7 @@ static int cmp_xe_offset(const void *a, const void *b)
/*
* defrag a xattr bucket if we find that the bucket has some
- * holes beteen name/value pairs.
+ * holes between name/value pairs.
* We will move all the name/value pairs to the end of the bucket
* so that we can spare some space for insertion.
*/
@@ -4427,7 +4426,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
*/
sort(entries, le16_to_cpu(xh->xh_count),
sizeof(struct ocfs2_xattr_entry),
- cmp_xe_offset, swap_xe);
+ cmp_xe_offset, NULL);
/* Move all name/values to the end of the bucket. */
xe = xh->xh_entries;
@@ -4469,7 +4468,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
/* sort the entries by their name_hash. */
sort(entries, le16_to_cpu(xh->xh_count),
sizeof(struct ocfs2_xattr_entry),
- cmp_xe, swap_xe);
+ cmp_xe, NULL);
buf = bucket_buf;
for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
@@ -5012,7 +5011,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
* 2. If cluster_size == bucket_size:
* a) If the previous extent rec has more than one cluster and the insert
* place isn't in the last cluster, copy the entire last cluster to the
- * new one. This time, we don't need to upate the first_bh and header_bh
+ * new one. This time, we don't need to update the first_bh and header_bh
* since they will not be moved into the new cluster.
* b) Otherwise, move the bottom half of the xattrs in the last cluster into
* the new one. And we set the extend flag to zero if the insert place is
@@ -6190,7 +6189,7 @@ struct ocfs2_xattr_reflink {
/*
* Given a xattr header and xe offset,
* return the proper xv and the corresponding bh.
- * xattr in inode, block and xattr tree have different implementaions.
+ * xattr in inode, block and xattr tree have different implementations.
*/
typedef int (get_xattr_value_root)(struct super_block *sb,
struct buffer_head *bh,
@@ -6270,7 +6269,7 @@ static int ocfs2_get_xattr_value_root(struct super_block *sb,
}
/*
- * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
+ * Lock the meta_ac and calculate how much credits we need for reflink xattrs.
* It is only used for inline xattr and xattr block.
*/
static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
@@ -6352,7 +6351,7 @@ static int ocfs2_reflink_xattr_header(handle_t *handle,
trace_ocfs2_reflink_xattr_header((unsigned long long)old_bh->b_blocknr,
le16_to_cpu(xh->xh_count));
- last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
+ last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)] - 1;
for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
xe = &xh->xh_entries[i];
@@ -6511,16 +6510,7 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
}
new_oi = OCFS2_I(args->new_inode);
- /*
- * Adjust extent record count to reserve space for extended attribute.
- * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
- */
- if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
- !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
- struct ocfs2_extent_list *el = &new_di->id2.i_list;
- le16_add_cpu(&el->l_count, -(inline_size /
- sizeof(struct ocfs2_extent_rec)));
- }
+
spin_lock(&new_oi->ip_lock);
new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 00308b57f64f..65e9aa743919 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -30,7 +30,7 @@ struct ocfs2_security_xattr_info {
extern const struct xattr_handler ocfs2_xattr_user_handler;
extern const struct xattr_handler ocfs2_xattr_trusted_handler;
extern const struct xattr_handler ocfs2_xattr_security_handler;
-extern const struct xattr_handler *ocfs2_xattr_handlers[];
+extern const struct xattr_handler * const ocfs2_xattr_handlers[];
ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int,