summaryrefslogtreecommitdiff
path: root/fs/ocfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/alloc.c168
-rw-r--r--fs/ocfs2/alloc.h8
-rw-r--r--fs/ocfs2/aops.c376
-rw-r--r--fs/ocfs2/aops.h19
-rw-r--r--fs/ocfs2/buffer_head_io.c4
-rw-r--r--fs/ocfs2/cluster/heartbeat.c56
-rw-r--r--fs/ocfs2/cluster/masklog.h2
-rw-r--r--fs/ocfs2/cluster/quorum.c8
-rw-r--r--fs/ocfs2/cluster/tcp.c17
-rw-r--r--fs/ocfs2/dcache.c14
-rw-r--r--fs/ocfs2/dir.c86
-rw-r--r--fs/ocfs2/dlm/dlmapi.h4
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c62
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c12
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c12
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c13
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c33
-rw-r--r--fs/ocfs2/dlmglue.c67
-rw-r--r--fs/ocfs2/dlmglue.h6
-rw-r--r--fs/ocfs2/export.c13
-rw-r--r--fs/ocfs2/extent_map.c18
-rw-r--r--fs/ocfs2/file.c61
-rw-r--r--fs/ocfs2/file.h1
-rw-r--r--fs/ocfs2/filecheck.c2
-rw-r--r--fs/ocfs2/inode.c30
-rw-r--r--fs/ocfs2/ioctl.c3
-rw-r--r--fs/ocfs2/journal.c300
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/localalloc.c30
-rw-r--r--fs/ocfs2/mmap.c22
-rw-r--r--fs/ocfs2/move_extents.c10
-rw-r--r--fs/ocfs2/namei.c27
-rw-r--r--fs/ocfs2/ocfs2.h46
-rw-r--r--fs/ocfs2/ocfs2_fs.h11
-rw-r--r--fs/ocfs2/ocfs2_ioctl.h2
-rw-r--r--fs/ocfs2/ocfs2_lockid.h2
-rw-r--r--fs/ocfs2/ocfs2_trace.h80
-rw-r--r--fs/ocfs2/quota.h1
-rw-r--r--fs/ocfs2/quota_global.c24
-rw-r--r--fs/ocfs2/quota_local.c20
-rw-r--r--fs/ocfs2/refcounttree.c82
-rw-r--r--fs/ocfs2/reservations.c2
-rw-r--r--fs/ocfs2/reservations.h4
-rw-r--r--fs/ocfs2/resize.c10
-rw-r--r--fs/ocfs2/stack_o2cb.c4
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/stackglue.c5
-rw-r--r--fs/ocfs2/stackglue.h4
-rw-r--r--fs/ocfs2/suballoc.c155
-rw-r--r--fs/ocfs2/suballoc.h7
-rw-r--r--fs/ocfs2/super.c621
-rw-r--r--fs/ocfs2/symlink.c16
-rw-r--r--fs/ocfs2/xattr.c66
53 files changed, 1439 insertions, 1212 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ea9127ba3208..821cb7874685 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -566,7 +566,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle,
struct ocfs2_path *path,
struct ocfs2_extent_rec *insert_rec);
/*
- * Reset the actual path elements so that we can re-use the structure
+ * Reset the actual path elements so that we can reuse the structure
* to build another path. Generally, this involves freeing the buffer
* heads.
*/
@@ -1182,7 +1182,7 @@ static int ocfs2_add_branch(handle_t *handle,
/*
* If there is a gap before the root end and the real end
- * of the righmost leaf block, we need to remove the gap
+ * of the rightmost leaf block, we need to remove the gap
* between new_cpos and root_end first so that the tree
* is consistent after we add a new branch(it will start
* from new_cpos).
@@ -1238,7 +1238,7 @@ static int ocfs2_add_branch(handle_t *handle,
/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
* linked with the rest of the tree.
- * conversly, new_eb_bhs[0] is the new bottommost leaf.
+ * conversely, new_eb_bhs[0] is the new bottommost leaf.
*
* when we leave the loop, new_last_eb_blk will point to the
* newest leaf, and next_blkno will point to the topmost extent
@@ -1803,6 +1803,14 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
el = root_el;
while (el->l_tree_depth) {
+ if (unlikely(le16_to_cpu(el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH)) {
+ ocfs2_error(ocfs2_metadata_cache_get_super(ci),
+ "Owner %llu has invalid tree depth %u in extent list\n",
+ (unsigned long long)ocfs2_metadata_cache_owner(ci),
+ le16_to_cpu(el->l_tree_depth));
+ ret = -EROFS;
+ goto out;
+ }
if (le16_to_cpu(el->l_next_free_rec) == 0) {
ocfs2_error(ocfs2_metadata_cache_get_super(ci),
"Owner %llu has empty extent list at depth %u\n",
@@ -3712,7 +3720,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
* update split_index here.
*
* When the split_index is zero, we need to merge it to the
- * prevoius extent block. It is more efficient and easier
+ * previous extent block. It is more efficient and easier
* if we do merge_right first and merge_left later.
*/
ret = ocfs2_merge_rec_right(path, handle, et, split_rec,
@@ -4517,7 +4525,7 @@ static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et,
}
/*
- * This should only be called against the righmost leaf extent list.
+ * This should only be called against the rightmost leaf extent list.
*
* ocfs2_figure_appending_type() will figure out whether we'll have to
* insert at the tail of the rightmost leaf.
@@ -4767,7 +4775,7 @@ bail:
}
/*
- * Allcate and add clusters into the extent b-tree.
+ * Allocate and add clusters into the extent b-tree.
* The new clusters(clusters_to_add) will be inserted at logical_offset.
* The extent b-tree's root is specified by et, and
* it is not limited to the file storage. Any extent tree can use this
@@ -6154,6 +6162,9 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
int status;
struct inode *inode = NULL;
struct buffer_head *bh = NULL;
+ struct ocfs2_dinode *di;
+ struct ocfs2_truncate_log *tl;
+ unsigned int tl_count;
inode = ocfs2_get_system_file_inode(osb,
TRUNCATE_LOG_SYSTEM_INODE,
@@ -6171,6 +6182,18 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
goto bail;
}
+ di = (struct ocfs2_dinode *)bh->b_data;
+ tl = &di->id2.i_dealloc;
+ tl_count = le16_to_cpu(tl->tl_count);
+ if (unlikely(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) ||
+ tl_count == 0)) {
+ status = -EFSCORRUPTED;
+ iput(inode);
+ brelse(bh);
+ mlog_errno(status);
+ goto bail;
+ }
+
*tl_inode = inode;
*tl_bh = bh;
bail:
@@ -6808,27 +6831,27 @@ static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
return 0;
}
-void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
- unsigned int from, unsigned int to,
- struct page *page, int zero, u64 *phys)
+void ocfs2_map_and_dirty_folio(struct inode *inode, handle_t *handle,
+ size_t from, size_t to, struct folio *folio, int zero,
+ u64 *phys)
{
int ret, partial = 0;
- loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from;
+ loff_t start_byte = folio_pos(folio) + from;
loff_t length = to - from;
- ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
+ ret = ocfs2_map_folio_blocks(folio, phys, inode, from, to, 0);
if (ret)
mlog_errno(ret);
if (zero)
- zero_user_segment(page, from, to);
+ folio_zero_segment(folio, from, to);
/*
* Need to set the buffers we zero'd into uptodate
* here if they aren't - ocfs2_map_page_blocks()
* might've skipped some
*/
- ret = walk_page_buffers(handle, page_buffers(page),
+ ret = walk_page_buffers(handle, folio_buffers(folio),
from, to, &partial,
ocfs2_zero_func);
if (ret < 0)
@@ -6841,92 +6864,89 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
}
if (!partial)
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
}
-static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
- loff_t end, struct page **pages,
- int numpages, u64 phys, handle_t *handle)
+static void ocfs2_zero_cluster_folios(struct inode *inode, loff_t start,
+ loff_t end, struct folio **folios, int numfolios,
+ u64 phys, handle_t *handle)
{
int i;
- struct page *page;
- unsigned int from, to = PAGE_SIZE;
struct super_block *sb = inode->i_sb;
BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
- if (numpages == 0)
+ if (numfolios == 0)
goto out;
- to = PAGE_SIZE;
- for(i = 0; i < numpages; i++) {
- page = pages[i];
+ for (i = 0; i < numfolios; i++) {
+ struct folio *folio = folios[i];
+ size_t to = folio_size(folio);
+ size_t from = offset_in_folio(folio, start);
- from = start & (PAGE_SIZE - 1);
- if ((end >> PAGE_SHIFT) == page->index)
- to = end & (PAGE_SIZE - 1);
+ if (to > end - folio_pos(folio))
+ to = end - folio_pos(folio);
- BUG_ON(from > PAGE_SIZE);
- BUG_ON(to > PAGE_SIZE);
+ ocfs2_map_and_dirty_folio(inode, handle, from, to, folio, 1,
+ &phys);
- ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1,
- &phys);
-
- start = (page->index + 1) << PAGE_SHIFT;
+ start = folio_next_index(folio) << PAGE_SHIFT;
}
out:
- if (pages)
- ocfs2_unlock_and_free_pages(pages, numpages);
+ if (folios)
+ ocfs2_unlock_and_free_folios(folios, numfolios);
}
-int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
- struct page **pages, int *num)
+static int ocfs2_grab_folios(struct inode *inode, loff_t start, loff_t end,
+ struct folio **folios, int *num)
{
- int numpages, ret = 0;
+ int numfolios, ret = 0;
struct address_space *mapping = inode->i_mapping;
unsigned long index;
loff_t last_page_bytes;
BUG_ON(start > end);
- numpages = 0;
+ numfolios = 0;
last_page_bytes = PAGE_ALIGN(end);
index = start >> PAGE_SHIFT;
do {
- pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
- if (!pages[numpages]) {
- ret = -ENOMEM;
+ folios[numfolios] = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS);
+ if (IS_ERR(folios[numfolios])) {
+ ret = PTR_ERR(folios[numfolios]);
mlog_errno(ret);
+ folios[numfolios] = NULL;
goto out;
}
- numpages++;
- index++;
+ index = folio_next_index(folios[numfolios]);
+ numfolios++;
} while (index < (last_page_bytes >> PAGE_SHIFT));
out:
if (ret != 0) {
- if (pages)
- ocfs2_unlock_and_free_pages(pages, numpages);
- numpages = 0;
+ if (folios)
+ ocfs2_unlock_and_free_folios(folios, numfolios);
+ numfolios = 0;
}
- *num = numpages;
+ *num = numfolios;
return ret;
}
-static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
- struct page **pages, int *num)
+static int ocfs2_grab_eof_folios(struct inode *inode, loff_t start, loff_t end,
+ struct folio **folios, int *num)
{
struct super_block *sb = inode->i_sb;
BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
(end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
- return ocfs2_grab_pages(inode, start, end, pages, num);
+ return ocfs2_grab_folios(inode, start, end, folios, num);
}
/*
@@ -6940,8 +6960,8 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
u64 range_start, u64 range_end)
{
- int ret = 0, numpages;
- struct page **pages = NULL;
+ int ret = 0, numfolios;
+ struct folio **folios = NULL;
u64 phys;
unsigned int ext_flags;
struct super_block *sb = inode->i_sb;
@@ -6954,17 +6974,17 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
return 0;
/*
- * Avoid zeroing pages fully beyond current i_size. It is pointless as
- * underlying blocks of those pages should be already zeroed out and
+ * Avoid zeroing folios fully beyond current i_size. It is pointless as
+ * underlying blocks of those folios should be already zeroed out and
* page writeback will skip them anyway.
*/
range_end = min_t(u64, range_end, i_size_read(inode));
if (range_start >= range_end)
return 0;
- pages = kcalloc(ocfs2_pages_per_cluster(sb),
- sizeof(struct page *), GFP_NOFS);
- if (pages == NULL) {
+ folios = kcalloc(ocfs2_pages_per_cluster(sb),
+ sizeof(struct folio *), GFP_NOFS);
+ if (folios == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
goto out;
@@ -6985,18 +7005,18 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
if (phys == 0 || ext_flags & OCFS2_EXT_UNWRITTEN)
goto out;
- ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages,
- &numpages);
+ ret = ocfs2_grab_eof_folios(inode, range_start, range_end, folios,
+ &numfolios);
if (ret) {
mlog_errno(ret);
goto out;
}
- ocfs2_zero_cluster_pages(inode, range_start, range_end, pages,
- numpages, phys, handle);
+ ocfs2_zero_cluster_folios(inode, range_start, range_end, folios,
+ numfolios, phys, handle);
/*
- * Initiate writeout of the pages we zero'd here. We don't
+ * Initiate writeout of the folios we zero'd here. We don't
* wait on them - the truncate_inode_pages() call later will
* do that for us.
*/
@@ -7006,7 +7026,7 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
mlog_errno(ret);
out:
- kfree(pages);
+ kfree(folios);
return ret;
}
@@ -7059,7 +7079,7 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh)
{
- int ret, has_data, num_pages = 0;
+ int ret, has_data, num_folios = 0;
int need_free = 0;
u32 bit_off, num;
handle_t *handle;
@@ -7068,7 +7088,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_alloc_context *data_ac = NULL;
- struct page *page = NULL;
+ struct folio *folio = NULL;
struct ocfs2_extent_tree et;
int did_quota = 0;
@@ -7119,12 +7139,12 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
/*
* Save two copies, one for insert, and one that can
- * be changed by ocfs2_map_and_dirty_page() below.
+ * be changed by ocfs2_map_and_dirty_folio() below.
*/
block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
- ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page,
- &num_pages);
+ ret = ocfs2_grab_eof_folios(inode, 0, page_end, &folio,
+ &num_folios);
if (ret) {
mlog_errno(ret);
need_free = 1;
@@ -7135,15 +7155,15 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
* This should populate the 1st page for us and mark
* it up to date.
*/
- ret = ocfs2_read_inline_data(inode, page, di_bh);
+ ret = ocfs2_read_inline_data(inode, folio, di_bh);
if (ret) {
mlog_errno(ret);
need_free = 1;
goto out_unlock;
}
- ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0,
- &phys);
+ ocfs2_map_and_dirty_folio(inode, handle, 0, page_end, folio, 0,
+ &phys);
}
spin_lock(&oi->ip_lock);
@@ -7174,8 +7194,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
out_unlock:
- if (page)
- ocfs2_unlock_and_free_pages(&page, num_pages);
+ if (folio)
+ ocfs2_unlock_and_free_folios(&folio, num_folios);
out_commit:
if (ret < 0 && did_quota)
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 4af7abaa6e40..1c0c83362904 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -254,11 +254,9 @@ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
return !rec->e_leaf_clusters;
}
-int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
- struct page **pages, int *num);
-void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
- unsigned int from, unsigned int to,
- struct page *page, int zero, u64 *phys);
+void ocfs2_map_and_dirty_folio(struct inode *inode, handle_t *handle,
+ size_t from, size_t to, struct folio *folio, int zero,
+ u64 *phys);
/*
* Structures which describe a path through a btree, and functions to
* manipulate them.
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b82185075de7..40b6bce12951 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -46,7 +46,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh = NULL;
struct buffer_head *buffer_cache_bh = NULL;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- void *kaddr;
trace_ocfs2_symlink_get_block(
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -91,17 +90,11 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
* could've happened. Since we've got a reference on
* the bh, even if it commits while we're doing the
* copy, the data is still good. */
- if (buffer_jbd(buffer_cache_bh)
- && ocfs2_inode_is_new(inode)) {
- kaddr = kmap_atomic(bh_result->b_page);
- if (!kaddr) {
- mlog(ML_ERROR, "couldn't kmap!\n");
- goto bail;
- }
- memcpy(kaddr + (bh_result->b_size * iblock),
- buffer_cache_bh->b_data,
- bh_result->b_size);
- kunmap_atomic(kaddr);
+ if (buffer_jbd(buffer_cache_bh) && ocfs2_inode_is_new(inode)) {
+ memcpy_to_folio(bh_result->b_folio,
+ bh_result->b_size * iblock,
+ buffer_cache_bh->b_data,
+ bh_result->b_size);
set_buffer_uptodate(bh_result);
}
brelse(buffer_cache_bh);
@@ -156,9 +149,8 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
err = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno, &count,
&ext_flags);
if (err) {
- mlog(ML_ERROR, "Error %d from get_blocks(0x%p, %llu, 1, "
- "%llu, NULL)\n", err, inode, (unsigned long long)iblock,
- (unsigned long long)p_blkno);
+ mlog(ML_ERROR, "get_blocks() failed, inode: 0x%p, "
+ "block: %llu\n", inode, (unsigned long long)iblock);
goto bail;
}
@@ -216,10 +208,9 @@ bail:
return err;
}
-int ocfs2_read_inline_data(struct inode *inode, struct page *page,
+int ocfs2_read_inline_data(struct inode *inode, struct folio *folio,
struct buffer_head *di_bh)
{
- void *kaddr;
loff_t size;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -231,7 +222,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
size = i_size_read(inode);
- if (size > PAGE_SIZE ||
+ if (size > folio_size(folio) ||
size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
ocfs2_error(inode->i_sb,
"Inode %llu has with inline data has bad size: %Lu\n",
@@ -240,25 +231,18 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
return -EROFS;
}
- kaddr = kmap_atomic(page);
- if (size)
- memcpy(kaddr, di->id2.i_data.id_data, size);
- /* Clear the remaining part of the page */
- memset(kaddr + size, 0, PAGE_SIZE - size);
- flush_dcache_page(page);
- kunmap_atomic(kaddr);
-
- SetPageUptodate(page);
+ folio_fill_tail(folio, 0, di->id2.i_data.id_data, size);
+ folio_mark_uptodate(folio);
return 0;
}
-static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
+static int ocfs2_readpage_inline(struct inode *inode, struct folio *folio)
{
int ret;
struct buffer_head *di_bh = NULL;
- BUG_ON(!PageLocked(page));
+ BUG_ON(!folio_test_locked(folio));
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
ret = ocfs2_read_inode_block(inode, &di_bh);
@@ -267,9 +251,9 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
goto out;
}
- ret = ocfs2_read_inline_data(inode, page, di_bh);
+ ret = ocfs2_read_inline_data(inode, folio, di_bh);
out:
- unlock_page(page);
+ folio_unlock(folio);
brelse(di_bh);
return ret;
@@ -284,7 +268,7 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio)
trace_ocfs2_readpage((unsigned long long)oi->ip_blkno, folio->index);
- ret = ocfs2_inode_lock_with_page(inode, NULL, 0, &folio->page);
+ ret = ocfs2_inode_lock_with_folio(inode, NULL, 0, folio);
if (ret != 0) {
if (ret == AOP_TRUNCATED_PAGE)
unlock = 0;
@@ -306,7 +290,7 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio)
}
/*
- * i_size might have just been updated as we grabed the meta lock. We
+ * i_size might have just been updated as we grabbed the meta lock. We
* might now be discovering a truncate that hit on another node.
* block_read_full_folio->get_block freaks out if it is asked to read
* beyond the end of a file, so we check here. Callers
@@ -323,7 +307,7 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio)
}
if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
- ret = ocfs2_readpage_inline(inode, &folio->page);
+ ret = ocfs2_readpage_inline(inode, folio);
else
ret = block_read_full_folio(folio, ocfs2_get_block);
unlock = 0;
@@ -535,7 +519,7 @@ static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
*
* from == to == 0 is code for "zero the entire cluster region"
*/
-static void ocfs2_clear_page_regions(struct page *page,
+static void ocfs2_clear_folio_regions(struct folio *folio,
struct ocfs2_super *osb, u32 cpos,
unsigned from, unsigned to)
{
@@ -544,7 +528,7 @@ static void ocfs2_clear_page_regions(struct page *page,
ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end);
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_folio(folio, 0);
if (from || to) {
if (from > cluster_start)
@@ -555,13 +539,13 @@ static void ocfs2_clear_page_regions(struct page *page,
memset(kaddr + cluster_start, 0, cluster_end - cluster_start);
}
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
}
/*
* Nonsparse file systems fully allocate before we get to the write
* code. This prevents ocfs2_write() from tagging the write as an
- * allocating one, which means ocfs2_map_page_blocks() might try to
+ * allocating one, which means ocfs2_map_folio_blocks() might try to
* read-in the blocks at the tail of our file. Avoid reading them by
* testing i_size against each block offset.
*/
@@ -586,11 +570,10 @@ static int ocfs2_should_read_blk(struct inode *inode, struct folio *folio,
*
* This will also skip zeroing, which is handled externally.
*/
-int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
+int ocfs2_map_folio_blocks(struct folio *folio, u64 *p_blkno,
struct inode *inode, unsigned int from,
unsigned int to, int new)
{
- struct folio *folio = page_folio(page);
int ret = 0;
struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;
unsigned int block_end, block_start;
@@ -730,24 +713,24 @@ struct ocfs2_write_ctxt {
unsigned int w_large_pages;
/*
- * Pages involved in this write.
+ * Folios involved in this write.
*
- * w_target_page is the page being written to by the user.
+ * w_target_folio is the folio being written to by the user.
*
- * w_pages is an array of pages which always contains
- * w_target_page, and in the case of an allocating write with
+ * w_folios is an array of folios which always contains
+ * w_target_folio, and in the case of an allocating write with
* page_size < cluster size, it will contain zero'd and mapped
- * pages adjacent to w_target_page which need to be written
+ * pages adjacent to w_target_folio which need to be written
* out in so that future reads from that region will get
* zero's.
*/
- unsigned int w_num_pages;
- struct page *w_pages[OCFS2_MAX_CTXT_PAGES];
- struct page *w_target_page;
+ unsigned int w_num_folios;
+ struct folio *w_folios[OCFS2_MAX_CTXT_PAGES];
+ struct folio *w_target_folio;
/*
* w_target_locked is used for page_mkwrite path indicating no unlocking
- * against w_target_page in ocfs2_write_end_nolock.
+ * against w_target_folio in ocfs2_write_end_nolock.
*/
unsigned int w_target_locked:1;
@@ -772,40 +755,40 @@ struct ocfs2_write_ctxt {
unsigned int w_unwritten_count;
};
-void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
+void ocfs2_unlock_and_free_folios(struct folio **folios, int num_folios)
{
int i;
- for(i = 0; i < num_pages; i++) {
- if (pages[i]) {
- unlock_page(pages[i]);
- mark_page_accessed(pages[i]);
- put_page(pages[i]);
- }
+ for(i = 0; i < num_folios; i++) {
+ if (!folios[i])
+ continue;
+ folio_unlock(folios[i]);
+ folio_mark_accessed(folios[i]);
+ folio_put(folios[i]);
}
}
-static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
+static void ocfs2_unlock_folios(struct ocfs2_write_ctxt *wc)
{
int i;
/*
* w_target_locked is only set to true in the page_mkwrite() case.
* The intent is to allow us to lock the target page from write_begin()
- * to write_end(). The caller must hold a ref on w_target_page.
+ * to write_end(). The caller must hold a ref on w_target_folio.
*/
if (wc->w_target_locked) {
- BUG_ON(!wc->w_target_page);
- for (i = 0; i < wc->w_num_pages; i++) {
- if (wc->w_target_page == wc->w_pages[i]) {
- wc->w_pages[i] = NULL;
+ BUG_ON(!wc->w_target_folio);
+ for (i = 0; i < wc->w_num_folios; i++) {
+ if (wc->w_target_folio == wc->w_folios[i]) {
+ wc->w_folios[i] = NULL;
break;
}
}
- mark_page_accessed(wc->w_target_page);
- put_page(wc->w_target_page);
+ folio_mark_accessed(wc->w_target_folio);
+ folio_put(wc->w_target_folio);
}
- ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
+ ocfs2_unlock_and_free_folios(wc->w_folios, wc->w_num_folios);
}
static void ocfs2_free_unwritten_list(struct inode *inode,
@@ -827,7 +810,7 @@ static void ocfs2_free_write_ctxt(struct inode *inode,
struct ocfs2_write_ctxt *wc)
{
ocfs2_free_unwritten_list(inode, &wc->w_unwritten_list);
- ocfs2_unlock_pages(wc);
+ ocfs2_unlock_folios(wc);
brelse(wc->w_di_bh);
kfree(wc);
}
@@ -870,29 +853,30 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
* and dirty so they'll be written out (in order to prevent uninitialised
* block data from leaking). And clear the new bit.
*/
-static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to)
+static void ocfs2_zero_new_buffers(struct folio *folio, size_t from, size_t to)
{
unsigned int block_start, block_end;
struct buffer_head *head, *bh;
- BUG_ON(!PageLocked(page));
- if (!page_has_buffers(page))
+ BUG_ON(!folio_test_locked(folio));
+ head = folio_buffers(folio);
+ if (!head)
return;
- bh = head = page_buffers(page);
+ bh = head;
block_start = 0;
do {
block_end = block_start + bh->b_size;
if (buffer_new(bh)) {
if (block_end > from && block_start < to) {
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
unsigned start, end;
start = max(from, block_start);
end = min(to, block_end);
- zero_user_segment(page, start, end);
+ folio_zero_segment(folio, start, end);
set_buffer_uptodate(bh);
}
@@ -917,29 +901,26 @@ static void ocfs2_write_failure(struct inode *inode,
int i;
unsigned from = user_pos & (PAGE_SIZE - 1),
to = user_pos + user_len;
- struct page *tmppage;
- if (wc->w_target_page)
- ocfs2_zero_new_buffers(wc->w_target_page, from, to);
+ if (wc->w_target_folio)
+ ocfs2_zero_new_buffers(wc->w_target_folio, from, to);
- for(i = 0; i < wc->w_num_pages; i++) {
- tmppage = wc->w_pages[i];
+ for (i = 0; i < wc->w_num_folios; i++) {
+ struct folio *folio = wc->w_folios[i];
- if (tmppage && page_has_buffers(tmppage)) {
+ if (folio && folio_buffers(folio)) {
if (ocfs2_should_order_data(inode))
ocfs2_jbd2_inode_add_write(wc->w_handle, inode,
user_pos, user_len);
- block_commit_write(tmppage, from, to);
+ block_commit_write(folio, from, to);
}
}
}
-static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
- struct ocfs2_write_ctxt *wc,
- struct page *page, u32 cpos,
- loff_t user_pos, unsigned user_len,
- int new)
+static int ocfs2_prepare_folio_for_write(struct inode *inode, u64 *p_blkno,
+ struct ocfs2_write_ctxt *wc, struct folio *folio, u32 cpos,
+ loff_t user_pos, unsigned user_len, int new)
{
int ret;
unsigned int map_from = 0, map_to = 0;
@@ -952,20 +933,19 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
/* treat the write as new if the a hole/lseek spanned across
* the page boundary.
*/
- new = new | ((i_size_read(inode) <= page_offset(page)) &&
- (page_offset(page) <= user_pos));
+ new = new | ((i_size_read(inode) <= folio_pos(folio)) &&
+ (folio_pos(folio) <= user_pos));
- if (page == wc->w_target_page) {
+ if (folio == wc->w_target_folio) {
map_from = user_pos & (PAGE_SIZE - 1);
map_to = map_from + user_len;
if (new)
- ret = ocfs2_map_page_blocks(page, p_blkno, inode,
- cluster_start, cluster_end,
- new);
+ ret = ocfs2_map_folio_blocks(folio, p_blkno, inode,
+ cluster_start, cluster_end, new);
else
- ret = ocfs2_map_page_blocks(page, p_blkno, inode,
- map_from, map_to, new);
+ ret = ocfs2_map_folio_blocks(folio, p_blkno, inode,
+ map_from, map_to, new);
if (ret) {
mlog_errno(ret);
goto out;
@@ -979,7 +959,7 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
}
} else {
/*
- * If we haven't allocated the new page yet, we
+ * If we haven't allocated the new folio yet, we
* shouldn't be writing it out without copying user
* data. This is likely a math error from the caller.
*/
@@ -988,8 +968,8 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
map_from = cluster_start;
map_to = cluster_end;
- ret = ocfs2_map_page_blocks(page, p_blkno, inode,
- cluster_start, cluster_end, new);
+ ret = ocfs2_map_folio_blocks(folio, p_blkno, inode,
+ cluster_start, cluster_end, new);
if (ret) {
mlog_errno(ret);
goto out;
@@ -997,20 +977,20 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
}
/*
- * Parts of newly allocated pages need to be zero'd.
+ * Parts of newly allocated folios need to be zero'd.
*
* Above, we have also rewritten 'to' and 'from' - as far as
* the rest of the function is concerned, the entire cluster
- * range inside of a page needs to be written.
+ * range inside of a folio needs to be written.
*
- * We can skip this if the page is up to date - it's already
+ * We can skip this if the folio is uptodate - it's already
* been zero'd from being read in as a hole.
*/
- if (new && !PageUptodate(page))
- ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb),
+ if (new && !folio_test_uptodate(folio))
+ ocfs2_clear_folio_regions(folio, OCFS2_SB(inode->i_sb),
cpos, user_data_from, user_data_to);
- flush_dcache_page(page);
+ flush_dcache_folio(folio);
out:
return ret;
@@ -1019,11 +999,9 @@ out:
/*
* This function will only grab one clusters worth of pages.
*/
-static int ocfs2_grab_pages_for_write(struct address_space *mapping,
- struct ocfs2_write_ctxt *wc,
- u32 cpos, loff_t user_pos,
- unsigned user_len, int new,
- struct page *mmap_page)
+static int ocfs2_grab_folios_for_write(struct address_space *mapping,
+ struct ocfs2_write_ctxt *wc, u32 cpos, loff_t user_pos,
+ unsigned user_len, int new, struct folio *mmap_folio)
{
int ret = 0, i;
unsigned long start, target_index, end_index, index;
@@ -1040,7 +1018,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
* last page of the write.
*/
if (new) {
- wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
+ wc->w_num_folios = ocfs2_pages_per_cluster(inode->i_sb);
start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
/*
* We need the index *past* the last page we could possibly
@@ -1050,15 +1028,15 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
last_byte = max(user_pos + user_len, i_size_read(inode));
BUG_ON(last_byte < 1);
end_index = ((last_byte - 1) >> PAGE_SHIFT) + 1;
- if ((start + wc->w_num_pages) > end_index)
- wc->w_num_pages = end_index - start;
+ if ((start + wc->w_num_folios) > end_index)
+ wc->w_num_folios = end_index - start;
} else {
- wc->w_num_pages = 1;
+ wc->w_num_folios = 1;
start = target_index;
}
end_index = (user_pos + user_len - 1) >> PAGE_SHIFT;
- for(i = 0; i < wc->w_num_pages; i++) {
+ for(i = 0; i < wc->w_num_folios; i++) {
index = start + i;
if (index >= target_index && index <= end_index &&
@@ -1068,37 +1046,38 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
* and wants us to directly use the page
* passed in.
*/
- lock_page(mmap_page);
+ folio_lock(mmap_folio);
/* Exit and let the caller retry */
- if (mmap_page->mapping != mapping) {
- WARN_ON(mmap_page->mapping);
- unlock_page(mmap_page);
+ if (mmap_folio->mapping != mapping) {
+ WARN_ON(mmap_folio->mapping);
+ folio_unlock(mmap_folio);
ret = -EAGAIN;
goto out;
}
- get_page(mmap_page);
- wc->w_pages[i] = mmap_page;
+ folio_get(mmap_folio);
+ wc->w_folios[i] = mmap_folio;
wc->w_target_locked = true;
} else if (index >= target_index && index <= end_index &&
wc->w_type == OCFS2_WRITE_DIRECT) {
/* Direct write has no mapping page. */
- wc->w_pages[i] = NULL;
+ wc->w_folios[i] = NULL;
continue;
} else {
- wc->w_pages[i] = find_or_create_page(mapping, index,
- GFP_NOFS);
- if (!wc->w_pages[i]) {
- ret = -ENOMEM;
+ wc->w_folios[i] = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ GFP_NOFS);
+ if (IS_ERR(wc->w_folios[i])) {
+ ret = PTR_ERR(wc->w_folios[i]);
mlog_errno(ret);
goto out;
}
}
- wait_for_stable_page(wc->w_pages[i]);
+ folio_wait_stable(wc->w_folios[i]);
if (index == target_index)
- wc->w_target_page = wc->w_pages[i];
+ wc->w_target_folio = wc->w_folios[i];
}
out:
if (ret)
@@ -1182,19 +1161,18 @@ static int ocfs2_write_cluster(struct address_space *mapping,
if (!should_zero)
p_blkno += (user_pos >> inode->i_sb->s_blocksize_bits) & (u64)(bpc - 1);
- for(i = 0; i < wc->w_num_pages; i++) {
+ for (i = 0; i < wc->w_num_folios; i++) {
int tmpret;
/* This is the direct io target page. */
- if (wc->w_pages[i] == NULL) {
- p_blkno++;
+ if (wc->w_folios[i] == NULL) {
+ p_blkno += (1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits));
continue;
}
- tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,
- wc->w_pages[i], cpos,
- user_pos, user_len,
- should_zero);
+ tmpret = ocfs2_prepare_folio_for_write(inode, &p_blkno, wc,
+ wc->w_folios[i], cpos, user_pos, user_len,
+ should_zero);
if (tmpret) {
mlog_errno(tmpret);
if (ret == 0)
@@ -1473,7 +1451,7 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
{
int ret;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct page *page;
+ struct folio *folio;
handle_t *handle;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
@@ -1484,19 +1462,21 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
goto out;
}
- page = find_or_create_page(mapping, 0, GFP_NOFS);
- if (!page) {
+ folio = __filemap_get_folio(mapping, 0,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS);
+ if (IS_ERR(folio)) {
ocfs2_commit_trans(osb, handle);
- ret = -ENOMEM;
+ ret = PTR_ERR(folio);
mlog_errno(ret);
goto out;
}
/*
- * If we don't set w_num_pages then this page won't get unlocked
+ * If we don't set w_num_folios then this folio won't get unlocked
* and freed on cleanup of the write context.
*/
- wc->w_pages[0] = wc->w_target_page = page;
- wc->w_num_pages = 1;
+ wc->w_target_folio = folio;
+ wc->w_folios[0] = folio;
+ wc->w_num_folios = 1;
ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1510,8 +1490,8 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
ocfs2_set_inode_data_inline(inode, di);
- if (!PageUptodate(page)) {
- ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh);
+ if (!folio_test_uptodate(folio)) {
+ ret = ocfs2_read_inline_data(inode, folio, wc->w_di_bh);
if (ret) {
ocfs2_commit_trans(osb, handle);
@@ -1534,9 +1514,8 @@ int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size)
}
static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
- struct inode *inode, loff_t pos,
- unsigned len, struct page *mmap_page,
- struct ocfs2_write_ctxt *wc)
+ struct inode *inode, loff_t pos, size_t len,
+ struct folio *mmap_folio, struct ocfs2_write_ctxt *wc)
{
int ret, written = 0;
loff_t end = pos + len;
@@ -1551,7 +1530,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
* Handle inodes which already have inline data 1st.
*/
if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
- if (mmap_page == NULL &&
+ if (mmap_folio == NULL &&
ocfs2_size_fits_inline_data(wc->w_di_bh, end))
goto do_inline_write;
@@ -1575,7 +1554,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
* Check whether the write can fit.
*/
di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
- if (mmap_page ||
+ if (mmap_folio ||
end > ocfs2_max_inline_data_with_xattr(inode->i_sb, di))
return 0;
@@ -1642,9 +1621,9 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
}
int ocfs2_write_begin_nolock(struct address_space *mapping,
- loff_t pos, unsigned len, ocfs2_write_type_t type,
- struct page **pagep, void **fsdata,
- struct buffer_head *di_bh, struct page *mmap_page)
+ loff_t pos, unsigned len, ocfs2_write_type_t type,
+ struct folio **foliop, void **fsdata,
+ struct buffer_head *di_bh, struct folio *mmap_folio)
{
int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0;
@@ -1667,7 +1646,7 @@ try_again:
if (ocfs2_supports_inline_data(osb)) {
ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len,
- mmap_page, wc);
+ mmap_folio, wc);
if (ret == 1) {
ret = 0;
goto success;
@@ -1719,7 +1698,7 @@ try_again:
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(long long)i_size_read(inode),
le32_to_cpu(di->i_clusters),
- pos, len, type, mmap_page,
+ pos, len, type, mmap_folio,
clusters_to_alloc, extents_to_split);
/*
@@ -1790,21 +1769,21 @@ try_again:
}
/*
- * Fill our page array first. That way we've grabbed enough so
+ * Fill our folio array first. That way we've grabbed enough so
* that we can zero and flush if we error after adding the
* extent.
*/
- ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
- cluster_of_pages, mmap_page);
+ ret = ocfs2_grab_folios_for_write(mapping, wc, wc->w_cpos, pos, len,
+ cluster_of_pages, mmap_folio);
if (ret) {
/*
- * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
- * the target page. In this case, we exit with no error and no target
- * page. This will trigger the caller, page_mkwrite(), to re-try
- * the operation.
+ * ocfs2_grab_folios_for_write() returns -EAGAIN if it
+ * could not lock the target folio. In this case, we exit
+ * with no error and no target folio. This will trigger
+ * the caller, page_mkwrite(), to re-try the operation.
*/
if (type == OCFS2_WRITE_MMAP && ret == -EAGAIN) {
- BUG_ON(wc->w_target_page);
+ BUG_ON(wc->w_target_folio);
ret = 0;
goto out_quota;
}
@@ -1826,8 +1805,8 @@ try_again:
ocfs2_free_alloc_context(meta_ac);
success:
- if (pagep)
- *pagep = wc->w_target_page;
+ if (foliop)
+ *foliop = wc->w_target_folio;
*fsdata = wc;
return 0;
out_quota:
@@ -1846,7 +1825,7 @@ out:
* to VM code.
*/
if (wc->w_target_locked)
- unlock_page(mmap_page);
+ folio_unlock(mmap_folio);
ocfs2_free_write_ctxt(inode, wc);
@@ -1879,7 +1858,7 @@ out:
static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
int ret;
struct buffer_head *di_bh = NULL;
@@ -1901,7 +1880,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
down_write(&OCFS2_I(inode)->ip_alloc_sem);
ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_BUFFER,
- pagep, fsdata, di_bh, NULL);
+ foliop, fsdata, di_bh, NULL);
if (ret) {
mlog_errno(ret);
goto out_fail;
@@ -1925,18 +1904,15 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
struct ocfs2_dinode *di,
struct ocfs2_write_ctxt *wc)
{
- void *kaddr;
-
if (unlikely(*copied < len)) {
- if (!PageUptodate(wc->w_target_page)) {
+ if (!folio_test_uptodate(wc->w_target_folio)) {
*copied = 0;
return;
}
}
- kaddr = kmap_atomic(wc->w_target_page);
- memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
- kunmap_atomic(kaddr);
+ memcpy_from_folio(di->id2.i_data.id_data + pos, wc->w_target_folio,
+ pos, *copied);
trace_ocfs2_write_end_inline(
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -1945,17 +1921,16 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
le16_to_cpu(di->i_dyn_features));
}
-int ocfs2_write_end_nolock(struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied, void *fsdata)
+int ocfs2_write_end_nolock(struct address_space *mapping, loff_t pos,
+ unsigned len, unsigned copied, void *fsdata)
{
int i, ret;
- unsigned from, to, start = pos & (PAGE_SIZE - 1);
+ size_t from, to, start = pos & (PAGE_SIZE - 1);
struct inode *inode = mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_write_ctxt *wc = fsdata;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
handle_t *handle = wc->w_handle;
- struct page *tmppage;
BUG_ON(!list_empty(&wc->w_unwritten_list));
@@ -1974,44 +1949,44 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
goto out_write_size;
}
- if (unlikely(copied < len) && wc->w_target_page) {
+ if (unlikely(copied < len) && wc->w_target_folio) {
loff_t new_isize;
- if (!PageUptodate(wc->w_target_page))
+ if (!folio_test_uptodate(wc->w_target_folio))
copied = 0;
new_isize = max_t(loff_t, i_size_read(inode), pos + copied);
- if (new_isize > page_offset(wc->w_target_page))
- ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
+ if (new_isize > folio_pos(wc->w_target_folio))
+ ocfs2_zero_new_buffers(wc->w_target_folio, start+copied,
start+len);
else {
/*
- * When page is fully beyond new isize (data copy
- * failed), do not bother zeroing the page. Invalidate
+ * When folio is fully beyond new isize (data copy
+ * failed), do not bother zeroing the folio. Invalidate
* it instead so that writeback does not get confused
* put page & buffer dirty bits into inconsistent
* state.
*/
- block_invalidate_folio(page_folio(wc->w_target_page),
- 0, PAGE_SIZE);
+ block_invalidate_folio(wc->w_target_folio, 0,
+ folio_size(wc->w_target_folio));
}
}
- if (wc->w_target_page)
- flush_dcache_page(wc->w_target_page);
+ if (wc->w_target_folio)
+ flush_dcache_folio(wc->w_target_folio);
- for(i = 0; i < wc->w_num_pages; i++) {
- tmppage = wc->w_pages[i];
+ for (i = 0; i < wc->w_num_folios; i++) {
+ struct folio *folio = wc->w_folios[i];
- /* This is the direct io target page. */
- if (tmppage == NULL)
+ /* This is the direct io target folio */
+ if (folio == NULL)
continue;
- if (tmppage == wc->w_target_page) {
+ if (folio == wc->w_target_folio) {
from = wc->w_target_from;
to = wc->w_target_to;
- BUG_ON(from > PAGE_SIZE ||
- to > PAGE_SIZE ||
+ BUG_ON(from > folio_size(folio) ||
+ to > folio_size(folio) ||
to < from);
} else {
/*
@@ -2020,19 +1995,17 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
* to flush their entire range.
*/
from = 0;
- to = PAGE_SIZE;
+ to = folio_size(folio);
}
- if (page_has_buffers(tmppage)) {
+ if (folio_buffers(folio)) {
if (handle && ocfs2_should_order_data(inode)) {
- loff_t start_byte =
- ((loff_t)tmppage->index << PAGE_SHIFT) +
- from;
+ loff_t start_byte = folio_pos(folio) + from;
loff_t length = to - from;
ocfs2_jbd2_inode_add_write(handle, inode,
start_byte, length);
}
- block_commit_write(tmppage, from, to);
+ block_commit_write(folio, from, to);
}
}
@@ -2061,7 +2034,7 @@ out:
* this lock and will ask for the page lock when flushing the data.
* put it here to preserve the unlock order.
*/
- ocfs2_unlock_pages(wc);
+ ocfs2_unlock_folios(wc);
if (handle)
ocfs2_commit_trans(osb, handle);
@@ -2076,7 +2049,7 @@ out:
static int ocfs2_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
int ret;
struct inode *inode = mapping->host;
@@ -2283,8 +2256,6 @@ unlock:
ocfs2_inode_unlock(inode, 1);
brelse(di_bh);
out:
- if (ret < 0)
- ret = -EIO;
return ret;
}
@@ -2368,6 +2339,11 @@ static int ocfs2_dio_end_io_write(struct inode *inode,
}
list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
+ ret = ocfs2_assure_trans_credits(handle, credits);
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
ret = ocfs2_mark_extent_written(inode, &et, handle,
ue->ue_cpos, 1,
ue->ue_phys,
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 3a520117fa59..114efc9111e4 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -8,16 +8,11 @@
#include <linux/fs.h>
-handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
- struct page *page,
- unsigned from,
- unsigned to);
-
-int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
+int ocfs2_map_folio_blocks(struct folio *folio, u64 *p_blkno,
struct inode *inode, unsigned int from,
unsigned int to, int new);
-void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages);
+void ocfs2_unlock_and_free_folios(struct folio **folios, int num_folios);
int walk_page_buffers( handle_t *handle,
struct buffer_head *head,
@@ -37,11 +32,11 @@ typedef enum {
} ocfs2_write_type_t;
int ocfs2_write_begin_nolock(struct address_space *mapping,
- loff_t pos, unsigned len, ocfs2_write_type_t type,
- struct page **pagep, void **fsdata,
- struct buffer_head *di_bh, struct page *mmap_page);
+ loff_t pos, unsigned len, ocfs2_write_type_t type,
+ struct folio **foliop, void **fsdata,
+ struct buffer_head *di_bh, struct folio *mmap_folio);
-int ocfs2_read_inline_data(struct inode *inode, struct page *page,
+int ocfs2_read_inline_data(struct inode *inode, struct folio *folio,
struct buffer_head *di_bh);
int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
@@ -70,6 +65,8 @@ enum ocfs2_iocb_lock_bits {
OCFS2_IOCB_NUM_LOCKS
};
+#define ocfs2_iocb_init_rw_locked(iocb) \
+ (iocb->private = NULL)
#define ocfs2_iocb_clear_rw_locked(iocb) \
clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
#define ocfs2_iocb_rw_locked_level(iocb) \
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index cdb9b9bdea1f..8f714406528d 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -235,7 +235,6 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
if (bhs[i] == NULL) {
bhs[i] = sb_getblk(sb, block++);
if (bhs[i] == NULL) {
- ocfs2_metadata_cache_io_unlock(ci);
status = -ENOMEM;
mlog_errno(status);
/* Don't forget to put previous bh! */
@@ -389,7 +388,8 @@ read_failure:
/* Always set the buffer in the cache, even if it was
* a forced read, or read-ahead which hasn't yet
* completed. */
- ocfs2_set_buffer_uptodate(ci, bh);
+ if (bh)
+ ocfs2_set_buffer_uptodate(ci, bh);
}
ocfs2_metadata_cache_io_unlock(ci);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 1bde1281d514..724350925aff 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -3,6 +3,7 @@
* Copyright (C) 2004, 2005 Oracle. All rights reserved.
*/
+#include "linux/kstrtox.h"
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/jiffies.h>
@@ -1020,7 +1021,7 @@ fire_callbacks:
if (list_empty(&slot->ds_live_item))
goto out;
- /* live nodes only go dead after enough consequtive missed
+ /* live nodes only go dead after enough consecutive missed
* samples.. reset the missed counter whenever we see
* activity */
if (slot->ds_equal_samples >= o2hb_dead_threshold || gen_changed) {
@@ -1535,10 +1536,11 @@ static int o2hb_read_block_input(struct o2hb_region *reg,
{
unsigned long bytes;
char *p = (char *)page;
+ int ret;
- bytes = simple_strtoul(p, &p, 0);
- if (!p || (*p && (*p != '\n')))
- return -EINVAL;
+ ret = kstrtoul(p, 0, &bytes);
+ if (ret)
+ return ret;
/* Heartbeat and fs min / max block sizes are the same. */
if (bytes > 4096 || bytes < 512)
@@ -1622,13 +1624,14 @@ static ssize_t o2hb_region_blocks_store(struct config_item *item,
struct o2hb_region *reg = to_o2hb_region(item);
unsigned long tmp;
char *p = (char *)page;
+ int ret;
if (reg->hr_bdev_file)
return -EINVAL;
- tmp = simple_strtoul(p, &p, 0);
- if (!p || (*p && (*p != '\n')))
- return -EINVAL;
+ ret = kstrtoul(p, 0, &tmp);
+ if (ret)
+ return ret;
if (tmp > O2NM_MAX_NODES || tmp == 0)
return -ERANGE;
@@ -1765,42 +1768,41 @@ static ssize_t o2hb_region_dev_store(struct config_item *item,
long fd;
int sectsize;
char *p = (char *)page;
- struct fd f;
ssize_t ret = -EINVAL;
int live_threshold;
if (reg->hr_bdev_file)
- goto out;
+ return -EINVAL;
/* We can't heartbeat without having had our node number
* configured yet. */
if (o2nm_this_node() == O2NM_MAX_NODES)
- goto out;
+ return -EINVAL;
- fd = simple_strtol(p, &p, 0);
- if (!p || (*p && (*p != '\n')))
- goto out;
+ ret = kstrtol(p, 0, &fd);
+ if (ret < 0)
+ return -EINVAL;
if (fd < 0 || fd >= INT_MAX)
- goto out;
+ return -EINVAL;
- f = fdget(fd);
- if (f.file == NULL)
- goto out;
+ CLASS(fd, f)(fd);
+ if (fd_empty(f))
+ return -EINVAL;
if (reg->hr_blocks == 0 || reg->hr_start_block == 0 ||
reg->hr_block_bytes == 0)
- goto out2;
+ return -EINVAL;
- if (!S_ISBLK(f.file->f_mapping->host->i_mode))
- goto out2;
+ if (!S_ISBLK(fd_file(f)->f_mapping->host->i_mode))
+ return -EINVAL;
- reg->hr_bdev_file = bdev_file_open_by_dev(f.file->f_mapping->host->i_rdev,
+ reg->hr_bdev_file = bdev_file_open_by_dev(fd_file(f)->f_mapping->host->i_rdev,
BLK_OPEN_WRITE | BLK_OPEN_READ, NULL, NULL);
if (IS_ERR(reg->hr_bdev_file)) {
ret = PTR_ERR(reg->hr_bdev_file);
reg->hr_bdev_file = NULL;
- goto out2;
+ return ret;
}
sectsize = bdev_logical_block_size(reg_bdev(reg));
@@ -1906,9 +1908,6 @@ out3:
fput(reg->hr_bdev_file);
reg->hr_bdev_file = NULL;
}
-out2:
- fdput(f);
-out:
return ret;
}
@@ -2140,10 +2139,11 @@ static ssize_t o2hb_heartbeat_group_dead_threshold_store(struct config_item *ite
{
unsigned long tmp;
char *p = (char *)page;
+ int ret;
- tmp = simple_strtoul(p, &p, 10);
- if (!p || (*p && (*p != '\n')))
- return -EINVAL;
+ ret = kstrtoul(p, 10, &tmp);
+ if (ret)
+ return ret;
/* this will validate ranges for us. */
o2hb_dead_threshold_set((unsigned int) tmp);
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index b73fc42e46ff..630bd5a3dd0d 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -29,7 +29,7 @@
* just calling printk() so that this can eventually make its way through
* relayfs along with the debugging messages. Everything else gets KERN_DEBUG.
* The inline tests and macro dance give GCC the opportunity to quite cleverly
- * only emit the appropriage printk() when the caller passes in a constant
+ * only emit the appropriate printk() when the caller passes in a constant
* mask, as is almost always the case.
*
* All this bitmask nonsense is managed from the files under
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 15d0ed9c13e5..bfb8b456876c 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -23,7 +23,7 @@
* race between when we see a node start heartbeating and when we connect
* to it.
*
- * So nodes that are in this transtion put a hold on the quorum decision
+ * So nodes that are in this transition put a hold on the quorum decision
* with a counter. As they fall out of this transition they drop the count
* and if they're the last, they fire off the decision.
*/
@@ -60,7 +60,7 @@ static void o2quo_fence_self(void)
switch (o2nm_single_cluster->cl_fence_method) {
case O2NM_FENCE_PANIC:
panic("*** ocfs2 is very sorry to be fencing this system by "
- "panicing ***\n");
+ "panicking ***\n");
break;
default:
WARN_ON(o2nm_single_cluster->cl_fence_method >=
@@ -189,7 +189,7 @@ static void o2quo_clear_hold(struct o2quo_state *qs, u8 node)
}
/* as a node comes up we delay the quorum decision until we know the fate of
- * the connection. the hold will be droped in conn_up or hb_down. it might be
+ * the connection. the hold will be dropped in conn_up or hb_down. it might be
* perpetuated by con_err until hb_down. if we already have a conn, we might
* be dropping a hold that conn_up got. */
void o2quo_hb_up(u8 node)
@@ -256,7 +256,7 @@ void o2quo_hb_still_up(u8 node)
}
/* This is analogous to hb_up. as a node's connection comes up we delay the
- * quorum decision until we see it heartbeating. the hold will be droped in
+ * quorum decision until we see it heartbeating. the hold will be dropped in
* hb_up or hb_down. it might be perpetuated by con_err until hb_down. if
* it's already heartbeating we might be dropping a hold that conn_up got.
* */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 960080753d3b..43e652a2adaf 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -5,13 +5,13 @@
*
* ----
*
- * Callers for this were originally written against a very simple synchronus
+ * Callers for this were originally written against a very simple synchronous
* API. This implementation reflects those simple callers. Some day I'm sure
* we'll need to move to a more robust posting/callback mechanism.
*
* Transmit calls pass in kernel virtual addresses and block copying this into
* the socket's tx buffers via a usual blocking sendmsg. They'll block waiting
- * for a failed socket to timeout. TX callers can also pass in a poniter to an
+ * for a failed socket to timeout. TX callers can also pass in a pointer to an
* 'int' which gets filled with an errno off the wire in response to the
* message they send.
*
@@ -101,7 +101,7 @@ static struct socket *o2net_listen_sock;
* o2net_wq. teardown detaches the callbacks before destroying the workqueue.
* quorum work is queued as sock containers are shutdown.. stop_listening
* tears down all the node's sock containers, preventing future shutdowns
- * and queued quroum work, before canceling delayed quorum work and
+ * and queued quorum work, before canceling delayed quorum work and
* destroying the work queue.
*/
static struct workqueue_struct *o2net_wq;
@@ -724,7 +724,7 @@ static void o2net_shutdown_sc(struct work_struct *work)
if (o2net_unregister_callbacks(sc->sc_sock->sk, sc)) {
/* we shouldn't flush as we're in the thread, the
* races with pending sc work structs are harmless */
- del_timer_sync(&sc->sc_idle_timeout);
+ timer_delete_sync(&sc->sc_idle_timeout);
o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
sc_put(sc);
kernel_sock_shutdown(sc->sc_sock, SHUT_RDWR);
@@ -1419,7 +1419,7 @@ out:
return ret;
}
-/* this work func is triggerd by data ready. it reads until it can read no
+/* this work func is triggered by data ready. it reads until it can read no
* more. it interprets 0, eof, as fatal. if data_ready hits while we're doing
* our work the work struct will be marked and we'll be called again. */
static void o2net_rx_until_empty(struct work_struct *work)
@@ -1483,7 +1483,7 @@ static void o2net_sc_send_keep_req(struct work_struct *work)
sc_put(sc);
}
-/* socket shutdown does a del_timer_sync against this as it tears down.
+/* socket shutdown does a timer_delete_sync against this as it tears down.
* we can't start this timer until we've got to the point in sc buildup
* where shutdown is going to be involved */
static void o2net_idle_timer(struct timer_list *t)
@@ -1784,6 +1784,9 @@ static int o2net_accept_one(struct socket *sock, int *more)
struct o2nm_node *node = NULL;
struct o2nm_node *local_node = NULL;
struct o2net_sock_container *sc = NULL;
+ struct proto_accept_arg arg = {
+ .flags = O_NONBLOCK,
+ };
struct o2net_node *nn;
unsigned int nofs_flag;
@@ -1802,7 +1805,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
new_sock->type = sock->type;
new_sock->ops = sock->ops;
- ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false);
+ ret = sock->ops->accept(sock, new_sock, &arg);
if (ret < 0)
goto out;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index a9b8688aaf30..1873bbbb7e5b 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -32,7 +32,8 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry)
}
-static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
+static int ocfs2_dentry_revalidate(struct inode *dir, const struct qstr *name,
+ struct dentry *dentry, unsigned int flags)
{
struct inode *inode;
int ret = 0; /* if all else fails, just return false */
@@ -44,8 +45,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
inode = d_inode(dentry);
osb = OCFS2_SB(dentry->d_sb);
- trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len,
- dentry->d_name.name);
+ trace_ocfs2_dentry_revalidate(dentry, name->len, name->name);
/* For a negative dentry -
* check the generation number of the parent and compare with the
@@ -53,12 +53,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags)
*/
if (inode == NULL) {
unsigned long gen = (unsigned long) dentry->d_fsdata;
- unsigned long pgen;
- spin_lock(&dentry->d_lock);
- pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen;
- spin_unlock(&dentry->d_lock);
- trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len,
- dentry->d_name.name,
+ unsigned long pgen = OCFS2_I(dir)->ip_dir_lock_gen;
+ trace_ocfs2_dentry_revalidate_negative(name->len, name->name,
pgen, gen);
if (gen != pgen)
goto bail;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index d620d4c53c6f..7799f4d16ce9 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -294,13 +294,16 @@ out:
* bh passed here can be an inode block or a dir data block, depending
* on the inode inline data flag.
*/
-static int ocfs2_check_dir_entry(struct inode * dir,
- struct ocfs2_dir_entry * de,
- struct buffer_head * bh,
+static int ocfs2_check_dir_entry(struct inode *dir,
+ struct ocfs2_dir_entry *de,
+ struct buffer_head *bh,
+ char *buf,
+ unsigned int size,
unsigned long offset)
{
const char *error_msg = NULL;
const int rlen = le16_to_cpu(de->rec_len);
+ const unsigned long next_offset = ((char *) de - buf) + rlen;
if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
error_msg = "rec_len is smaller than minimal";
@@ -308,9 +311,11 @@ static int ocfs2_check_dir_entry(struct inode * dir,
error_msg = "rec_len % 4 != 0";
else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
- else if (unlikely(
- ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
- error_msg = "directory entry across blocks";
+ else if (unlikely(next_offset > size))
+ error_msg = "directory entry overrun";
+ else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) &&
+ next_offset != size)
+ error_msg = "directory entry too close to end";
if (unlikely(error_msg != NULL))
mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
@@ -352,16 +357,17 @@ static inline int ocfs2_search_dirblock(struct buffer_head *bh,
de_buf = first_de;
dlimit = de_buf + bytes;
- while (de_buf < dlimit) {
+ while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) {
/* this code is executed quadratically often */
/* do minimal checking `by hand' */
de = (struct ocfs2_dir_entry *) de_buf;
- if (de_buf + namelen <= dlimit &&
+ if (de->name + namelen <= dlimit &&
ocfs2_match(namelen, name, de)) {
/* found a match - just to be sure, do a full check */
- if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, first_de,
+ bytes, offset)) {
ret = -1;
goto bail;
}
@@ -1059,26 +1065,39 @@ int ocfs2_find_entry(const char *name, int namelen,
{
struct buffer_head *bh;
struct ocfs2_dir_entry *res_dir = NULL;
+ int ret = 0;
if (ocfs2_dir_indexed(dir))
return ocfs2_find_entry_dx(name, namelen, dir, lookup);
+ if (unlikely(i_size_read(dir) <= 0)) {
+ ret = -EFSCORRUPTED;
+ mlog_errno(ret);
+ goto out;
+ }
/*
* The unindexed dir code only uses part of the lookup
* structure, so there's no reason to push it down further
* than this.
*/
- if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
+ if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ if (unlikely(i_size_read(dir) > dir->i_sb->s_blocksize)) {
+ ret = -EFSCORRUPTED;
+ mlog_errno(ret);
+ goto out;
+ }
bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir);
- else
+ } else {
bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir);
+ }
if (bh == NULL)
return -ENOENT;
lookup->dl_leaf_bh = bh;
lookup->dl_entry = res_dir;
- return 0;
+out:
+ return ret;
}
/*
@@ -1138,7 +1157,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
pde = NULL;
de = (struct ocfs2_dir_entry *) first_de;
while (i < bytes) {
- if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) {
status = -EIO;
mlog_errno(status);
goto bail;
@@ -1635,7 +1654,8 @@ int __ocfs2_add_entry(handle_t *handle,
/* These checks should've already been passed by the
* prepare function, but I guess we can leave them
* here anyway. */
- if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start,
+ size, offset)) {
retval = -ENOENT;
goto bail;
}
@@ -1774,7 +1794,8 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
}
de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
- if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) {
+ if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data,
+ i_size_read(inode), ctx->pos)) {
/* On error, skip the f_pos to the end. */
ctx->pos = i_size_read(inode);
break;
@@ -1867,7 +1888,8 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
while (ctx->pos < i_size_read(inode)
&& offset < sb->s_blocksize) {
de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
- if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data,
+ sb->s_blocksize, offset)) {
/* On error, skip the f_pos to the
next block. */
ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
@@ -1923,6 +1945,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
{
int error = 0;
struct inode *inode = file_inode(file);
+ struct ocfs2_file_private *fp = file->private_data;
int lock_level = 0;
trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -1943,7 +1966,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx)
goto bail_nolock;
}
- error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false);
+ error = ocfs2_dir_foreach_blk(inode, &fp->cookie, ctx, false);
ocfs2_inode_unlock(inode, lock_level);
if (error)
@@ -2000,6 +2023,7 @@ int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
*
* Return 0 if the name does not exist
* Return -EEXIST if the directory contains the name
+ * Return -EFSCORRUPTED if found corruption
*
* Callers should have i_rwsem + a cluster lock on dir
*/
@@ -2013,9 +2037,12 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
trace_ocfs2_check_dir_for_entry(
(unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
- if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) {
+ ret = ocfs2_find_entry(name, namelen, dir, &lookup);
+ if (ret == 0) {
ret = -EEXIST;
mlog_errno(ret);
+ } else if (ret == -ENOENT) {
+ ret = 0;
}
ocfs2_free_dir_lookup_result(&lookup);
@@ -3339,7 +3366,7 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
struct super_block *sb = dir->i_sb;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_dir_entry *de, *last_de = NULL;
- char *de_buf, *limit;
+ char *first_de, *de_buf, *limit;
unsigned long offset = 0;
unsigned int rec_len, new_rec_len, free_space;
@@ -3352,14 +3379,16 @@ static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
else
free_space = dir->i_sb->s_blocksize - i_size_read(dir);
- de_buf = di->id2.i_data.id_data;
+ first_de = di->id2.i_data.id_data;
+ de_buf = first_de;
limit = de_buf + i_size_read(dir);
rec_len = OCFS2_DIR_REC_LEN(namelen);
while (de_buf < limit) {
de = (struct ocfs2_dir_entry *)de_buf;
- if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de,
+ i_size_read(dir), offset)) {
ret = -ENOENT;
goto out;
}
@@ -3441,7 +3470,8 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
/* move to next block */
de = (struct ocfs2_dir_entry *) bh->b_data;
}
- if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
+ if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize,
+ offset)) {
status = -ENOENT;
goto bail;
}
@@ -3499,16 +3529,6 @@ static int dx_leaf_sort_cmp(const void *a, const void *b)
return 0;
}
-static void dx_leaf_sort_swap(void *a, void *b, int size)
-{
- struct ocfs2_dx_entry *entry1 = a;
- struct ocfs2_dx_entry *entry2 = b;
-
- BUG_ON(size != sizeof(*entry1));
-
- swap(*entry1, *entry2);
-}
-
static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf)
{
struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
@@ -3769,7 +3789,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
*/
sort(dx_leaf->dl_list.de_entries, num_used,
sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
- dx_leaf_sort_swap);
+ NULL);
ocfs2_journal_dirty(handle, dx_leaf_bh);
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
index bae60ca2672a..1969db8ffa9c 100644
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -62,8 +62,6 @@ enum dlm_status {
DLM_MAXSTATS, /* 41: upper limit for return code validation */
};
-/* for pretty-printing dlm_status error messages */
-const char *dlm_errmsg(enum dlm_status err);
/* for pretty-printing dlm_status error names */
const char *dlm_errname(enum dlm_status err);
@@ -120,7 +118,7 @@ struct dlm_lockstatus {
#define LKM_VALBLK 0x00000100 /* lock value block request */
#define LKM_NOQUEUE 0x00000200 /* non blocking request */
#define LKM_CONVERT 0x00000400 /* conversion request */
-#define LKM_NODLCKWT 0x00000800 /* this lock wont deadlock (U) */
+#define LKM_NODLCKWT 0x00000800 /* this lock won't deadlock (U) */
#define LKM_UNLOCK 0x00001000 /* deallocate this lock */
#define LKM_CANCEL 0x00002000 /* cancel conversion request */
#define LKM_DEQALL 0x00004000 /* remove all locks held by proc (U) */
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index be5e9ed7da8d..fe4fdd09bae3 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -14,6 +14,7 @@
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/export.h>
+#include <linux/string_choices.h>
#include "../cluster/heartbeat.h"
#include "../cluster/nodemanager.h"
@@ -90,12 +91,12 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
buf, res->owner, res->state);
printk(" last used: %lu, refcnt: %u, on purge list: %s\n",
res->last_used, kref_read(&res->refs),
- list_empty(&res->purge) ? "no" : "yes");
+ str_no_yes(list_empty(&res->purge)));
printk(" on dirty list: %s, on reco list: %s, "
"migrating pending: %s\n",
- list_empty(&res->dirty) ? "no" : "yes",
- list_empty(&res->recovering) ? "no" : "yes",
- res->migration_pending ? "yes" : "no");
+ str_no_yes(list_empty(&res->dirty)),
+ str_no_yes(list_empty(&res->recovering)),
+ str_yes_no(res->migration_pending));
printk(" inflight locks: %d, asts reserved: %d\n",
res->inflight_locks, atomic_read(&res->asts_reserved));
dlm_print_lockres_refmap(res);
@@ -164,59 +165,6 @@ static const char *dlm_errnames[] = {
[DLM_MAXSTATS] = "DLM_MAXSTATS",
};
-static const char *dlm_errmsgs[] = {
- [DLM_NORMAL] = "request in progress",
- [DLM_GRANTED] = "request granted",
- [DLM_DENIED] = "request denied",
- [DLM_DENIED_NOLOCKS] = "request denied, out of system resources",
- [DLM_WORKING] = "async request in progress",
- [DLM_BLOCKED] = "lock request blocked",
- [DLM_BLOCKED_ORPHAN] = "lock request blocked by a orphan lock",
- [DLM_DENIED_GRACE_PERIOD] = "topological change in progress",
- [DLM_SYSERR] = "system error",
- [DLM_NOSUPPORT] = "unsupported",
- [DLM_CANCELGRANT] = "can't cancel convert: already granted",
- [DLM_IVLOCKID] = "bad lockid",
- [DLM_SYNC] = "synchronous request granted",
- [DLM_BADTYPE] = "bad resource type",
- [DLM_BADRESOURCE] = "bad resource handle",
- [DLM_MAXHANDLES] = "no more resource handles",
- [DLM_NOCLINFO] = "can't contact cluster manager",
- [DLM_NOLOCKMGR] = "can't contact lock manager",
- [DLM_NOPURGED] = "can't contact purge daemon",
- [DLM_BADARGS] = "bad api args",
- [DLM_VOID] = "no status",
- [DLM_NOTQUEUED] = "NOQUEUE was specified and request failed",
- [DLM_IVBUFLEN] = "invalid resource name length",
- [DLM_CVTUNGRANT] = "attempted to convert ungranted lock",
- [DLM_BADPARAM] = "invalid lock mode specified",
- [DLM_VALNOTVALID] = "value block has been invalidated",
- [DLM_REJECTED] = "request rejected, unrecognized client",
- [DLM_ABORT] = "blocked lock request cancelled",
- [DLM_CANCEL] = "conversion request cancelled",
- [DLM_IVRESHANDLE] = "invalid resource handle",
- [DLM_DEADLOCK] = "deadlock recovery refused this request",
- [DLM_DENIED_NOASTS] = "failed to allocate AST",
- [DLM_FORWARD] = "request must wait for primary's response",
- [DLM_TIMEOUT] = "timeout value for lock has expired",
- [DLM_IVGROUPID] = "invalid group specification",
- [DLM_VERS_CONFLICT] = "version conflicts prevent request handling",
- [DLM_BAD_DEVICE_PATH] = "Locks device does not exist or path wrong",
- [DLM_NO_DEVICE_PERMISSION] = "Client has insufficient perms for device",
- [DLM_NO_CONTROL_DEVICE] = "Cannot set options on opened device ",
- [DLM_RECOVERING] = "lock resource being recovered",
- [DLM_MIGRATING] = "lock resource being migrated",
- [DLM_MAXSTATS] = "invalid error number",
-};
-
-const char *dlm_errmsg(enum dlm_status err)
-{
- if (err >= DLM_MAXSTATS || err < 0)
- return dlm_errmsgs[DLM_MAXSTATS];
- return dlm_errmsgs[err];
-}
-EXPORT_SYMBOL_GPL(dlm_errmsg);
-
const char *dlm_errname(enum dlm_status err)
{
if (err >= DLM_MAXSTATS || err < 0)
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 5c04dde99981..2018501b2249 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -1274,7 +1274,7 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
{
struct dlm_query_nodeinfo *qn;
struct dlm_ctxt *dlm = NULL;
- int locked = 0, status = -EINVAL;
+ int status = -EINVAL;
qn = (struct dlm_query_nodeinfo *) msg->buf;
@@ -1290,12 +1290,11 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
}
spin_lock(&dlm->spinlock);
- locked = 1;
if (dlm->joining_node != qn->qn_nodenum) {
mlog(ML_ERROR, "Node %d queried nodes on domain %s but "
"joining node is %d\n", qn->qn_nodenum, qn->qn_domain,
dlm->joining_node);
- goto bail;
+ goto unlock;
}
/* Support for node query was added in 1.1 */
@@ -1305,14 +1304,14 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len,
"but active dlm protocol is %d.%d\n", qn->qn_nodenum,
qn->qn_domain, dlm->dlm_locking_proto.pv_major,
dlm->dlm_locking_proto.pv_minor);
- goto bail;
+ goto unlock;
}
status = dlm_match_nodes(dlm, qn);
+unlock:
+ spin_unlock(&dlm->spinlock);
bail:
- if (locked)
- spin_unlock(&dlm->spinlock);
spin_unlock(&dlm_domain_lock);
return status;
@@ -1528,7 +1527,6 @@ static void dlm_send_join_asserts(struct dlm_ctxt *dlm,
{
int status, node, live;
- status = 0;
node = -1;
while ((node = find_next_bit(node_map, O2NM_MAX_NODES,
node + 1)) < O2NM_MAX_NODES) {
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index d610da8e2f24..86bb1a03bcc1 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -21,7 +21,7 @@
#include <linux/inet.h>
#include <linux/spinlock.h>
#include <linux/delay.h>
-
+#include <linux/string_choices.h>
#include "../cluster/heartbeat.h"
#include "../cluster/nodemanager.h"
@@ -2859,7 +2859,7 @@ static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
dlm_lockres_release_ast(dlm, res);
mlog(0, "about to wait on migration_wq, dirty=%s\n",
- res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no");
+ str_yes_no(res->state & DLM_LOCK_RES_DIRTY));
/* if the extra ref we just put was the final one, this
* will pass thru immediately. otherwise, we need to wait
* for the last ast to finish. */
@@ -2869,12 +2869,12 @@ again:
msecs_to_jiffies(1000));
if (ret < 0) {
mlog(0, "woken again: migrating? %s, dead? %s\n",
- res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no",
- test_bit(target, dlm->domain_map) ? "no":"yes");
+ str_yes_no(res->state & DLM_LOCK_RES_MIGRATING),
+ str_no_yes(test_bit(target, dlm->domain_map)));
} else {
mlog(0, "all is well: migrating? %s, dead? %s\n",
- res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no",
- test_bit(target, dlm->domain_map) ? "no":"yes");
+ str_yes_no(res->state & DLM_LOCK_RES_MIGRATING),
+ str_no_yes(test_bit(target, dlm->domain_map)));
}
if (!dlm_migration_can_proceed(dlm, res, target)) {
mlog(0, "trying again...\n");
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 50da8af988c1..67fc62a49a76 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -22,7 +22,7 @@
#include <linux/timer.h>
#include <linux/kthread.h>
#include <linux/delay.h>
-
+#include <linux/string_choices.h>
#include "../cluster/heartbeat.h"
#include "../cluster/nodemanager.h"
@@ -207,7 +207,7 @@ void dlm_complete_recovery_thread(struct dlm_ctxt *dlm)
* 1) all recovery threads cluster wide will work on recovering
* ONE node at a time
* 2) negotiate who will take over all the locks for the dead node.
- * thats right... ALL the locks.
+ * that's right... ALL the locks.
* 3) once a new master is chosen, everyone scans all locks
* and moves aside those mastered by the dead guy
* 4) each of these locks should be locked until recovery is done
@@ -581,8 +581,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
msecs_to_jiffies(1000));
mlog(0, "waited 1 sec for %u, "
"dead? %s\n", ndata->node_num,
- dlm_is_node_dead(dlm, ndata->node_num) ?
- "yes" : "no");
+ str_yes_no(dlm_is_node_dead(dlm, ndata->node_num)));
} else {
/* -ENOMEM on the other node */
mlog(0, "%s: node %u returned "
@@ -677,7 +676,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
spin_unlock(&dlm_reco_state_lock);
mlog(0, "pass #%d, all_nodes_done?: %s\n", ++pass,
- all_nodes_done?"yes":"no");
+ str_yes_no(all_nodes_done));
if (all_nodes_done) {
int ret;
@@ -1469,7 +1468,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
* The first one is handled at the end of this function. The
* other two are handled in the worker thread after locks have
* been attached. Yes, we don't wait for purge time to match
- * kref_init. The lockres will still have atleast one ref
+ * kref_init. The lockres will still have at least one ref
* added because it is in the hash __dlm_insert_lockres() */
extra_refs++;
@@ -1735,7 +1734,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
spin_unlock(&res->spinlock);
}
} else {
- /* put.. incase we are not the master */
+ /* put.. in case we are not the master */
spin_unlock(&res->spinlock);
dlm_lockres_put(res);
}
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 7fc0e920eda7..5130ec44e5e1 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -20,6 +20,7 @@
#include <linux/module.h>
#include <linux/fs.h>
+#include <linux/fs_context.h>
#include <linux/pagemap.h>
#include <linux/types.h>
#include <linux/slab.h>
@@ -401,10 +402,10 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
* File creation. Allocate an inode, and we're done..
*/
/* SMP-safe */
-static int dlmfs_mkdir(struct mnt_idmap * idmap,
- struct inode * dir,
- struct dentry * dentry,
- umode_t mode)
+static struct dentry *dlmfs_mkdir(struct mnt_idmap * idmap,
+ struct inode * dir,
+ struct dentry * dentry,
+ umode_t mode)
{
int status;
struct inode *inode = NULL;
@@ -447,7 +448,7 @@ static int dlmfs_mkdir(struct mnt_idmap * idmap,
bail:
if (status < 0)
iput(inode);
- return status;
+ return ERR_PTR(status);
}
static int dlmfs_create(struct mnt_idmap *idmap,
@@ -506,9 +507,7 @@ bail:
return status;
}
-static int dlmfs_fill_super(struct super_block * sb,
- void * data,
- int silent)
+static int dlmfs_fill_super(struct super_block *sb, struct fs_context *fc)
{
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_SIZE;
@@ -556,17 +555,27 @@ static const struct inode_operations dlmfs_file_inode_operations = {
.setattr = dlmfs_file_setattr,
};
-static struct dentry *dlmfs_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data)
+static int dlmfs_get_tree(struct fs_context *fc)
+{
+ return get_tree_nodev(fc, dlmfs_fill_super);
+}
+
+static const struct fs_context_operations dlmfs_context_ops = {
+ .get_tree = dlmfs_get_tree,
+};
+
+static int dlmfs_init_fs_context(struct fs_context *fc)
{
- return mount_nodev(fs_type, flags, data, dlmfs_fill_super);
+ fc->ops = &dlmfs_context_ops;
+
+ return 0;
}
static struct file_system_type dlmfs_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2_dlmfs",
- .mount = dlmfs_mount,
.kill_sb = kill_litter_super,
+ .init_fs_context = dlmfs_init_fs_context,
};
MODULE_ALIAS_FS("ocfs2_dlmfs");
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index cb40cafbc062..92a6149da9c1 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -19,6 +19,7 @@
#include <linux/delay.h>
#include <linux/quotaops.h>
#include <linux/sched/signal.h>
+#include <linux/string_choices.h>
#define MLOG_MASK_PREFIX ML_DLM_GLUE
#include <cluster/masklog.h>
@@ -221,12 +222,12 @@ struct ocfs2_lock_res_ops {
*/
#define LOCK_TYPE_USES_LVB 0x2
-static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
.get_osb = ocfs2_get_inode_osb,
.check_downconvert = ocfs2_check_meta_downconvert,
.set_lvb = ocfs2_set_meta_lvb,
@@ -234,50 +235,50 @@ static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_super_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_super_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH,
};
-static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_rename_lops = {
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
.get_osb = ocfs2_get_dentry_osb,
.post_unlock = ocfs2_dentry_post_unlock,
.downconvert_worker = ocfs2_dentry_convert_worker,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_flock_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_flock_lops = {
.get_osb = ocfs2_get_file_osb,
.flags = 0,
};
-static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
.set_lvb = ocfs2_set_qinfo_lvb,
.get_osb = ocfs2_get_qinfo_osb,
.flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
};
-static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
+static const struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
.check_downconvert = ocfs2_check_refcount_downconvert,
.downconvert_worker = ocfs2_refcount_convert_worker,
.flags = 0,
@@ -510,7 +511,7 @@ static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw)
static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
struct ocfs2_lock_res *res,
enum ocfs2_lock_type type,
- struct ocfs2_lock_res_ops *ops,
+ const struct ocfs2_lock_res_ops *ops,
void *priv)
{
res->l_type = type;
@@ -553,7 +554,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
unsigned int generation,
struct inode *inode)
{
- struct ocfs2_lock_res_ops *ops;
+ const struct ocfs2_lock_res_ops *ops;
switch(type) {
case OCFS2_LOCK_TYPE_RW:
@@ -794,7 +795,7 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
/*
* Keep a list of processes who have interest in a lockres.
- * Note: this is now only uesed for check recursive cluster locking.
+ * Note: this is now only used for check recursive cluster locking.
*/
static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres,
struct ocfs2_lock_holder *oh)
@@ -2529,30 +2530,28 @@ bail:
/*
* This is working around a lock inversion between tasks acquiring DLM
- * locks while holding a page lock and the downconvert thread which
- * blocks dlm lock acquiry while acquiring page locks.
+ * locks while holding a folio lock and the downconvert thread which
+ * blocks dlm lock acquiry while acquiring folio locks.
*
- * ** These _with_page variantes are only intended to be called from aop
- * methods that hold page locks and return a very specific *positive* error
+ * ** These _with_folio variants are only intended to be called from aop
+ * methods that hold folio locks and return a very specific *positive* error
* code that aop methods pass up to the VFS -- test for errors with != 0. **
*
* The DLM is called such that it returns -EAGAIN if it would have
* blocked waiting for the downconvert thread. In that case we unlock
- * our page so the downconvert thread can make progress. Once we've
+ * our folio so the downconvert thread can make progress. Once we've
* done this we have to return AOP_TRUNCATED_PAGE so the aop method
* that called us can bubble that back up into the VFS who will then
* immediately retry the aop call.
*/
-int ocfs2_inode_lock_with_page(struct inode *inode,
- struct buffer_head **ret_bh,
- int ex,
- struct page *page)
+int ocfs2_inode_lock_with_folio(struct inode *inode,
+ struct buffer_head **ret_bh, int ex, struct folio *folio)
{
int ret;
ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK);
if (ret == -EAGAIN) {
- unlock_page(page);
+ folio_unlock(folio);
/*
* If we can't get inode lock immediately, we should not return
* directly here, since this will lead to a softlockup problem.
@@ -2630,7 +2629,7 @@ void ocfs2_inode_unlock(struct inode *inode,
}
/*
- * This _tracker variantes are introduced to deal with the recursive cluster
+ * This _tracker variants are introduced to deal with the recursive cluster
* locking issue. The idea is to keep track of a lock holder on the stack of
* the current process. If there's a lock holder on the stack, we know the
* task context is already protected by cluster locking. Currently, they're
@@ -2735,7 +2734,7 @@ void ocfs2_inode_unlock_tracker(struct inode *inode,
struct ocfs2_lock_res *lockres;
lockres = &OCFS2_I(inode)->ip_inode_lockres;
- /* had_lock means that the currect process already takes the cluster
+ /* had_lock means that the current process already takes the cluster
* lock previously.
* If had_lock is 1, we have nothing to do here.
* If had_lock is 0, we will release the lock.
@@ -3110,6 +3109,7 @@ static void *ocfs2_dlm_seq_next(struct seq_file *m, void *v, loff_t *pos)
struct ocfs2_lock_res *iter = v;
struct ocfs2_lock_res *dummy = &priv->p_iter_res;
+ (*pos)++;
spin_lock(&ocfs2_dlm_tracking_lock);
iter = ocfs2_dlm_next_res(iter, priv);
list_del_init(&dummy->l_debug_list);
@@ -3151,11 +3151,8 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v)
#ifdef CONFIG_OCFS2_FS_STATS
if (!lockres->l_lock_wait && dlm_debug->d_filter_secs) {
now = ktime_to_us(ktime_get_real());
- if (lockres->l_lock_prmode.ls_last >
- lockres->l_lock_exmode.ls_last)
- last = lockres->l_lock_prmode.ls_last;
- else
- last = lockres->l_lock_exmode.ls_last;
+ last = max(lockres->l_lock_prmode.ls_last,
+ lockres->l_lock_exmode.ls_last);
/*
* Use d_filter_secs field to filter lock resources dump,
* the default d_filter_secs(0) value filters nothing,
@@ -3804,9 +3801,9 @@ recheck:
* set when the ast is received for an upconvert just before the
* OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
* on the heels of the ast, we want to delay the downconvert just
- * enough to allow the up requestor to do its task. Because this
+ * enough to allow the up requester to do its task. Because this
* lock is in the blocked queue, the lock will be downconverted
- * as soon as the requestor is done with the lock.
+ * as soon as the requester is done with the lock.
*/
if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
goto leave_requeue;
@@ -4341,7 +4338,7 @@ unqueue:
ocfs2_schedule_blocked_lock(osb, lockres);
mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name,
- ctl.requeue ? "yes" : "no");
+ str_yes_no(ctl.requeue));
spin_unlock_irqrestore(&lockres->l_lock, flags);
if (ctl.unblock_action != UNBLOCK_CONTINUE
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index e5da5809ed95..a3ebd7303ea2 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -137,10 +137,8 @@ int ocfs2_inode_lock_full_nested(struct inode *inode,
int ex,
int arg_flags,
int subclass);
-int ocfs2_inode_lock_with_page(struct inode *inode,
- struct buffer_head **ret_bh,
- int ex,
- struct page *page);
+int ocfs2_inode_lock_with_folio(struct inode *inode,
+ struct buffer_head **ret_bh, int ex, struct folio *folio);
/* Variants without special locking class or flags */
#define ocfs2_inode_lock_full(i, r, e, f)\
ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL)
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index b8b6a191b5cb..b95724b767e1 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -255,9 +255,9 @@ static struct dentry *ocfs2_fh_to_dentry(struct super_block *sb,
if (fh_len < 3 || fh_type > 2)
return NULL;
- handle.ih_blkno = (u64)le32_to_cpu(fid->raw[0]) << 32;
- handle.ih_blkno |= (u64)le32_to_cpu(fid->raw[1]);
- handle.ih_generation = le32_to_cpu(fid->raw[2]);
+ handle.ih_blkno = (u64)le32_to_cpu((__force __le32)fid->raw[0]) << 32;
+ handle.ih_blkno |= (u64)le32_to_cpu((__force __le32)fid->raw[1]);
+ handle.ih_generation = le32_to_cpu((__force __le32)fid->raw[2]);
return ocfs2_get_dentry(sb, &handle);
}
@@ -269,9 +269,9 @@ static struct dentry *ocfs2_fh_to_parent(struct super_block *sb,
if (fh_type != 2 || fh_len < 6)
return NULL;
- parent.ih_blkno = (u64)le32_to_cpu(fid->raw[3]) << 32;
- parent.ih_blkno |= (u64)le32_to_cpu(fid->raw[4]);
- parent.ih_generation = le32_to_cpu(fid->raw[5]);
+ parent.ih_blkno = (u64)le32_to_cpu((__force __le32)fid->raw[3]) << 32;
+ parent.ih_blkno |= (u64)le32_to_cpu((__force __le32)fid->raw[4]);
+ parent.ih_generation = le32_to_cpu((__force __le32)fid->raw[5]);
return ocfs2_get_dentry(sb, &parent);
}
@@ -280,5 +280,4 @@ const struct export_operations ocfs2_export_ops = {
.fh_to_dentry = ocfs2_fh_to_dentry,
.fh_to_parent = ocfs2_fh_to_parent,
.get_parent = ocfs2_get_parent,
- .flags = EXPORT_OP_ASYNC_LOCK,
};
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 70a768b623cf..930150ed5db1 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -435,6 +435,16 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
}
}
+ if (le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)) {
+ ocfs2_error(inode->i_sb,
+ "Inode %lu has an invalid extent (next_free_rec %u, count %u)\n",
+ inode->i_ino,
+ le16_to_cpu(el->l_next_free_rec),
+ le16_to_cpu(el->l_count));
+ ret = -EROFS;
+ goto out;
+ }
+
i = ocfs2_search_extent_list(el, v_cluster);
if (i == -1) {
/*
@@ -973,7 +983,13 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
}
while (done < nr) {
- down_read(&OCFS2_I(inode)->ip_alloc_sem);
+ if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) {
+ rc = -EAGAIN;
+ mlog(ML_ERROR,
+ "Inode #%llu ip_alloc_sem is temporarily unavailable\n",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ break;
+ }
rc = ocfs2_extent_map_get_blocks(inode, v_block + done,
&p_block, &p_count, NULL);
up_read(&OCFS2_I(inode)->ip_alloc_sem);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0da8e7bd3261..2056cf08ac1e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -755,7 +755,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
u64 abs_to, struct buffer_head *di_bh)
{
struct address_space *mapping = inode->i_mapping;
- struct page *page;
+ struct folio *folio;
unsigned long index = abs_from >> PAGE_SHIFT;
handle_t *handle;
int ret = 0;
@@ -774,18 +774,19 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
goto out;
}
- page = find_or_create_page(mapping, index, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
+ folio = __filemap_get_folio(mapping, index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS);
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
mlog_errno(ret);
goto out_commit_trans;
}
- /* Get the offsets within the page that we want to zero */
- zero_from = abs_from & (PAGE_SIZE - 1);
- zero_to = abs_to & (PAGE_SIZE - 1);
+ /* Get the offsets within the folio that we want to zero */
+ zero_from = offset_in_folio(folio, abs_from);
+ zero_to = offset_in_folio(folio, abs_to);
if (!zero_to)
- zero_to = PAGE_SIZE;
+ zero_to = folio_size(folio);
trace_ocfs2_write_zero_page(
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -803,7 +804,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
* __block_write_begin and block_commit_write to zero the
* whole block.
*/
- ret = __block_write_begin(page, block_start + 1, 0,
+ ret = __block_write_begin(folio, block_start + 1, 0,
ocfs2_get_block);
if (ret < 0) {
mlog_errno(ret);
@@ -812,7 +813,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
/* must not update i_size! */
- block_commit_write(page, block_start + 1, block_start + 1);
+ block_commit_write(folio, block_start + 1, block_start + 1);
}
/*
@@ -833,8 +834,8 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
}
out_unlock:
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
out_commit_trans:
if (handle)
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
@@ -1128,9 +1129,12 @@ int ocfs2_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
trace_ocfs2_setattr(inode, dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
dentry->d_name.len, dentry->d_name.name,
- attr->ia_valid, attr->ia_mode,
- from_kuid(&init_user_ns, attr->ia_uid),
- from_kgid(&init_user_ns, attr->ia_gid));
+ attr->ia_valid,
+ attr->ia_valid & ATTR_MODE ? attr->ia_mode : 0,
+ attr->ia_valid & ATTR_UID ?
+ from_kuid(&init_user_ns, attr->ia_uid) : 0,
+ attr->ia_valid & ATTR_GID ?
+ from_kgid(&init_user_ns, attr->ia_gid) : 0);
/* ensuring we don't even attempt to truncate a symlink */
if (S_ISLNK(inode->i_mode))
@@ -1783,6 +1787,14 @@ int ocfs2_remove_inode_range(struct inode *inode,
return 0;
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di);
+
+ if (byte_start > id_count || byte_start + byte_len > id_count) {
+ ret = -EINVAL;
+ mlog_errno(ret);
+ goto out;
+ }
+
ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
byte_start + byte_len, 0);
if (ret) {
@@ -1936,6 +1948,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
inode_lock(inode);
+ /* Wait all existing dio workers, newcomers will block on i_rwsem */
+ inode_dio_wait(inode);
/*
* This prevents concurrent writes on other nodes
*/
@@ -2384,6 +2398,8 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
} else
inode_lock(inode);
+ ocfs2_iocb_init_rw_locked(iocb);
+
/*
* Concurrent O_DIRECT writes are allowed with
* mount_option "coherency=buffered".
@@ -2530,6 +2546,8 @@ static ssize_t ocfs2_file_read_iter(struct kiocb *iocb,
if (!direct_io && nowait)
return -EOPNOTSUPP;
+ ocfs2_iocb_init_rw_locked(iocb);
+
/*
* buffered reads protect themselves in ->read_folio(). O_DIRECT reads
* need locks to protect pending reads from racing with truncate.
@@ -2748,6 +2766,13 @@ out_unlock:
return remapped > 0 ? remapped : ret;
}
+static loff_t ocfs2_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct ocfs2_file_private *fp = file->private_data;
+
+ return generic_llseek_cookie(file, offset, whence, &fp->cookie);
+}
+
const struct inode_operations ocfs2_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
@@ -2791,11 +2816,12 @@ const struct file_operations ocfs2_fops = {
.splice_write = iter_file_splice_write,
.fallocate = ocfs2_fallocate,
.remap_file_range = ocfs2_remap_file_range,
+ .fop_flags = FOP_ASYNC_LOCK,
};
WRAP_DIR_ITER(ocfs2_readdir) // FIXME!
const struct file_operations ocfs2_dops = {
- .llseek = generic_file_llseek,
+ .llseek = ocfs2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,
@@ -2807,6 +2833,7 @@ const struct file_operations ocfs2_dops = {
#endif
.lock = ocfs2_lock,
.flock = ocfs2_flock,
+ .fop_flags = FOP_ASYNC_LOCK,
};
/*
@@ -2841,7 +2868,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
};
const struct file_operations ocfs2_dops_no_plocks = {
- .llseek = generic_file_llseek,
+ .llseek = ocfs2_dir_llseek,
.read = generic_read_dir,
.iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 8e53e4ac1120..41e65e45a9f3 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -20,6 +20,7 @@ struct ocfs2_alloc_context;
enum ocfs2_alloc_restarted;
struct ocfs2_file_private {
+ u64 cookie;
struct file *fp_file;
struct mutex fp_mutex;
struct ocfs2_lock_res fp_flock;
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
index 1ad7106741f8..3ad7baf67658 100644
--- a/fs/ocfs2/filecheck.c
+++ b/fs/ocfs2/filecheck.c
@@ -505,5 +505,5 @@ static ssize_t ocfs2_filecheck_attr_store(struct kobject *kobj,
ocfs2_filecheck_handle_entry(ent, entry);
exit:
- return (!ret ? count : ret);
+ return ret ?: count;
}
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 999111bfc271..12e5d1f73325 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -200,6 +200,20 @@ bail:
return inode;
}
+static int ocfs2_dinode_has_extents(struct ocfs2_dinode *di)
+{
+ /* inodes flagged with other stuff in id2 */
+ if (di->i_flags & (OCFS2_SUPER_BLOCK_FL | OCFS2_LOCAL_ALLOC_FL |
+ OCFS2_CHAIN_FL | OCFS2_DEALLOC_FL))
+ return 0;
+ /* i_flags doesn't indicate when id2 is a fast symlink */
+ if (S_ISLNK(di->i_mode) && di->i_size && di->i_clusters == 0)
+ return 0;
+ if (di->i_dyn_features & OCFS2_INLINE_DATA_FL)
+ return 0;
+
+ return 1;
+}
/*
* here's how inodes get read from disk:
@@ -1122,7 +1136,7 @@ static void ocfs2_clear_inode(struct inode *inode)
dquot_drop(inode);
- /* To preven remote deletes we hold open lock before, now it
+ /* To prevent remote deletes we hold open lock before, now it
* is time to unlock PR and EX open locks. */
ocfs2_open_unlock(inode);
@@ -1437,7 +1451,7 @@ static int ocfs2_filecheck_validate_inode_block(struct super_block *sb,
* Call ocfs2_validate_meta_ecc() first since it has ecc repair
* function, but we should not return error immediately when ecc
* validation fails, because the reason is quite likely the invalid
- * inode number inputed.
+ * inode number inputted.
*/
rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
if (rc) {
@@ -1547,6 +1561,16 @@ static int ocfs2_filecheck_repair_inode_block(struct super_block *sb,
le32_to_cpu(di->i_fs_generation));
}
+ if (ocfs2_dinode_has_extents(di) &&
+ le16_to_cpu(di->id2.i_list.l_next_free_rec) > le16_to_cpu(di->id2.i_list.l_count)) {
+ di->id2.i_list.l_next_free_rec = di->id2.i_list.l_count;
+ changed = 1;
+ mlog(ML_ERROR,
+ "Filecheck: reset dinode #%llu: l_next_free_rec to %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le16_to_cpu(di->id2.i_list.l_next_free_rec));
+ }
+
if (changed || ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check)) {
ocfs2_compute_meta_ecc(sb, bh->b_data, &di->i_check);
mark_buffer_dirty(bh);
@@ -1621,6 +1645,7 @@ static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info
}
static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
+__acquires(&oi->ip_lock)
{
struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
@@ -1628,6 +1653,7 @@ static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
}
static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci)
+__releases(&oi->ip_lock)
{
struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index b1550ba73f96..7ae96fb8807a 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -125,6 +125,7 @@ int ocfs2_fileattr_set(struct mnt_idmap *idmap,
ocfs2_inode->ip_attr = flags;
ocfs2_set_inode_flags(inode);
+ inode_set_ctime_current(inode);
status = ocfs2_mark_inode_dirty(handle, inode, bh);
if (status < 0)
@@ -795,7 +796,7 @@ bail:
/*
* OCFS2_IOC_INFO handles an array of requests passed from userspace.
*
- * ocfs2_info_handle() recevies a large info aggregation, grab and
+ * ocfs2_info_handle() receives a large info aggregation, grab and
* validate the request count from header, then break it into small
* pieces, later specific handlers can handle them one by one.
*
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 604fea3a26ff..e5f58ff2175f 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
struct ocfs2_recovery_map *rm;
mutex_init(&osb->recovery_lock);
- osb->disable_recovery = 0;
+ osb->recovery_state = OCFS2_REC_ENABLED;
osb->recovery_thread_task = NULL;
init_waitqueue_head(&osb->recovery_event);
@@ -190,31 +190,53 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
return 0;
}
-/* we can't grab the goofy sem lock from inside wait_event, so we use
- * memory barriers to make sure that we'll see the null task before
- * being woken up */
static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
{
- mb();
return osb->recovery_thread_task != NULL;
}
-void ocfs2_recovery_exit(struct ocfs2_super *osb)
+static void ocfs2_recovery_disable(struct ocfs2_super *osb,
+ enum ocfs2_recovery_state state)
{
- struct ocfs2_recovery_map *rm;
-
- /* disable any new recovery threads and wait for any currently
- * running ones to exit. Do this before setting the vol_state. */
mutex_lock(&osb->recovery_lock);
- osb->disable_recovery = 1;
+ /*
+ * If recovery thread is not running, we can directly transition to
+ * final state.
+ */
+ if (!ocfs2_recovery_thread_running(osb)) {
+ osb->recovery_state = state + 1;
+ goto out_lock;
+ }
+ osb->recovery_state = state;
+ /* Wait for recovery thread to acknowledge state transition */
+ wait_event_cmd(osb->recovery_event,
+ !ocfs2_recovery_thread_running(osb) ||
+ osb->recovery_state >= state + 1,
+ mutex_unlock(&osb->recovery_lock),
+ mutex_lock(&osb->recovery_lock));
+out_lock:
mutex_unlock(&osb->recovery_lock);
- wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
- /* At this point, we know that no more recovery threads can be
- * launched, so wait for any recovery completion work to
- * complete. */
+ /*
+ * At this point we know that no more recovery work can be queued so
+ * wait for any recovery completion work to complete.
+ */
if (osb->ocfs2_wq)
flush_workqueue(osb->ocfs2_wq);
+}
+
+void ocfs2_recovery_disable_quota(struct ocfs2_super *osb)
+{
+ ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE);
+}
+
+void ocfs2_recovery_exit(struct ocfs2_super *osb)
+{
+ struct ocfs2_recovery_map *rm;
+
+ /* disable any new recovery threads and wait for any currently
+ * running ones to exit. Do this before setting the vol_state. */
+ ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE);
/*
* Now that recovery is shut down, and the osb is about to be
@@ -446,6 +468,23 @@ bail:
}
/*
+ * Make sure handle has at least 'nblocks' credits available. If it does not
+ * have that many credits available, we will try to extend the handle to have
+ * enough credits. If that fails, we will restart transaction to have enough
+ * credits. Similar notes regarding data consistency and locking implications
+ * as for ocfs2_extend_trans() apply here.
+ */
+int ocfs2_assure_trans_credits(handle_t *handle, int nblocks)
+{
+ int old_nblks = jbd2_handle_buffer_credits(handle);
+
+ trace_ocfs2_assure_trans_credits(old_nblks);
+ if (old_nblks >= nblocks)
+ return 0;
+ return ocfs2_extend_trans(handle, nblocks - old_nblks);
+}
+
+/*
* If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA.
* If that fails, restart the transaction & regain write access for the
* buffer head which is used for metadata modifications.
@@ -479,12 +518,6 @@ bail:
return status;
}
-
-struct ocfs2_triggers {
- struct jbd2_buffer_trigger_type ot_triggers;
- int ot_offset;
-};
-
static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
{
return container_of(triggers, struct ocfs2_triggers, ot_triggers);
@@ -548,85 +581,76 @@ static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh)
{
+ struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
+
mlog(ML_ERROR,
"ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
"bh->b_blocknr = %llu\n",
(unsigned long)bh,
(unsigned long long)bh->b_blocknr);
- ocfs2_error(bh->b_assoc_map->host->i_sb,
+ ocfs2_error(ot->sb,
"JBD2 has aborted our journal, ocfs2 cannot continue\n");
}
-static struct ocfs2_triggers di_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dinode, i_check),
-};
-
-static struct ocfs2_triggers eb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
-};
-
-static struct ocfs2_triggers rb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
-};
-
-static struct ocfs2_triggers gd_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
-};
-
-static struct ocfs2_triggers db_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_db_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
-};
+static void ocfs2_setup_csum_triggers(struct super_block *sb,
+ enum ocfs2_journal_trigger_type type,
+ struct ocfs2_triggers *ot)
+{
+ BUG_ON(type >= OCFS2_JOURNAL_TRIGGER_COUNT);
-static struct ocfs2_triggers xb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
-};
+ switch (type) {
+ case OCFS2_JTR_DI:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dinode, i_check);
+ break;
+ case OCFS2_JTR_EB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_extent_block, h_check);
+ break;
+ case OCFS2_JTR_RB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_refcount_block, rf_check);
+ break;
+ case OCFS2_JTR_GD:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_group_desc, bg_check);
+ break;
+ case OCFS2_JTR_DB:
+ ot->ot_triggers.t_frozen = ocfs2_db_frozen_trigger;
+ break;
+ case OCFS2_JTR_XB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_xattr_block, xb_check);
+ break;
+ case OCFS2_JTR_DQ:
+ ot->ot_triggers.t_frozen = ocfs2_dq_frozen_trigger;
+ break;
+ case OCFS2_JTR_DR:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check);
+ break;
+ case OCFS2_JTR_DL:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check);
+ break;
+ case OCFS2_JTR_NONE:
+ /* To make compiler happy... */
+ return;
+ }
-static struct ocfs2_triggers dq_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_dq_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
-};
+ ot->ot_triggers.t_abort = ocfs2_abort_trigger;
+ ot->sb = sb;
+}
-static struct ocfs2_triggers dr_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
-};
+void ocfs2_initialize_journal_triggers(struct super_block *sb,
+ struct ocfs2_triggers triggers[])
+{
+ enum ocfs2_journal_trigger_type type;
-static struct ocfs2_triggers dl_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
-};
+ for (type = OCFS2_JTR_DI; type < OCFS2_JOURNAL_TRIGGER_COUNT; type++)
+ ocfs2_setup_csum_triggers(sb, type, &triggers[type]);
+}
static int __ocfs2_journal_access(handle_t *handle,
struct ocfs2_caching_info *ci,
@@ -708,56 +732,91 @@ static int __ocfs2_journal_access(handle_t *handle,
int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DI],
+ type);
}
int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_EB],
+ type);
}
int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_RB],
type);
}
int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_GD],
+ type);
}
int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DB],
+ type);
}
int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_XB],
+ type);
}
int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DQ],
+ type);
}
int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DR],
+ type);
}
int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DL],
+ type);
}
int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
@@ -778,13 +837,15 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
if (!is_handle_aborted(handle)) {
journal_t *journal = handle->h_transaction->t_journal;
- mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
- "Aborting transaction and journal.\n");
+ mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed: "
+ "handle type %u started at line %u, credits %u/%u "
+ "errcode %d. Aborting transaction and journal.\n",
+ handle->h_type, handle->h_line_no,
+ handle->h_requested_credits,
+ jbd2_handle_buffer_credits(handle), status);
handle->h_err = status;
jbd2_journal_abort_handle(handle);
jbd2_journal_abort(journal, status);
- ocfs2_abort(bh->b_assoc_map->host->i_sb,
- "Journal already aborted.\n");
}
}
}
@@ -1016,7 +1077,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
if (!igrab(inode))
BUG();
- num_running_trans = atomic_read(&(osb->journal->j_num_trans));
+ num_running_trans = atomic_read(&(journal->j_num_trans));
trace_ocfs2_journal_shutdown(num_running_trans);
/* Do a commit_cache here. It will flush our journal, *and*
@@ -1035,9 +1096,10 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
osb->commit_task = NULL;
}
- BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
+ BUG_ON(atomic_read(&(journal->j_num_trans)) != 0);
- if (ocfs2_mount_local(osb)) {
+ if (ocfs2_mount_local(osb) &&
+ (journal->j_journal->j_flags & JBD2_LOADED)) {
jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal, 0);
jbd2_journal_unlock_updates(journal->j_journal);
@@ -1209,7 +1271,7 @@ static int ocfs2_force_read_journal(struct inode *inode)
}
for (i = 0; i < p_blocks; i++, p_blkno++) {
- bh = __find_get_block(osb->sb->s_bdev, p_blkno,
+ bh = __find_get_block_nonatomic(osb->sb->s_bdev, p_blkno,
osb->sb->s_blocksize);
/* block not cached. */
if (!bh)
@@ -1432,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg)
}
}
restart:
+ if (quota_enabled) {
+ mutex_lock(&osb->recovery_lock);
+ /* Confirm that recovery thread will no longer recover quotas */
+ if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) {
+ osb->recovery_state = OCFS2_REC_QUOTA_DISABLED;
+ wake_up(&osb->recovery_event);
+ }
+ if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED)
+ quota_enabled = 0;
+ mutex_unlock(&osb->recovery_lock);
+ }
+
status = ocfs2_super_lock(osb, 1);
if (status < 0) {
mlog_errno(status);
@@ -1529,27 +1603,29 @@ bail:
ocfs2_free_replay_slots(osb);
osb->recovery_thread_task = NULL;
- mb(); /* sync with ocfs2_recovery_thread_running */
+ if (osb->recovery_state == OCFS2_REC_WANT_DISABLE)
+ osb->recovery_state = OCFS2_REC_DISABLED;
wake_up(&osb->recovery_event);
mutex_unlock(&osb->recovery_lock);
- if (quota_enabled)
- kfree(rm_quota);
+ kfree(rm_quota);
return status;
}
void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
{
+ int was_set = -1;
+
mutex_lock(&osb->recovery_lock);
+ if (osb->recovery_state < OCFS2_REC_WANT_DISABLE)
+ was_set = ocfs2_recovery_map_set(osb, node_num);
trace_ocfs2_recovery_thread(node_num, osb->node_num,
- osb->disable_recovery, osb->recovery_thread_task,
- osb->disable_recovery ?
- -1 : ocfs2_recovery_map_set(osb, node_num));
+ osb->recovery_state, osb->recovery_thread_task, was_set);
- if (osb->disable_recovery)
+ if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE)
goto out;
if (osb->recovery_thread_task)
@@ -1916,7 +1992,7 @@ bail:
/*
* Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some
- * randomness to the timeout to minimize multple nodes firing the timer at the
+ * randomness to the timeout to minimize multiple nodes firing the timer at the
* same time.
*/
static inline unsigned long ocfs2_orphan_scan_timeout(void)
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 41c9fe7e62f9..6397170f302f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
int ocfs2_recovery_init(struct ocfs2_super *osb);
void ocfs2_recovery_exit(struct ocfs2_super *osb);
+void ocfs2_recovery_disable_quota(struct ocfs2_super *osb);
int ocfs2_compute_replay_slots(struct ocfs2_super *osb);
void ocfs2_free_replay_slots(struct ocfs2_super *osb);
@@ -243,6 +244,8 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb,
int ocfs2_commit_trans(struct ocfs2_super *osb,
handle_t *handle);
int ocfs2_extend_trans(handle_t *handle, int nblocks);
+int ocfs2_assure_trans_credits(handle_t *handle,
+ int nblocks);
int ocfs2_allocate_extend_trans(handle_t *handle,
int thresh);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index c803c10dd97e..d1aa04a5af1b 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -212,14 +212,15 @@ static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
unsigned int num_clusters)
{
- spin_lock(&osb->osb_lock);
- if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
- osb->local_alloc_state == OCFS2_LA_THROTTLED)
- if (num_clusters >= osb->local_alloc_default_bits) {
+ if (num_clusters >= osb->local_alloc_default_bits) {
+ spin_lock(&osb->osb_lock);
+ if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
+ osb->local_alloc_state == OCFS2_LA_THROTTLED) {
cancel_delayed_work(&osb->la_enable_wq);
osb->local_alloc_state = OCFS2_LA_ENABLED;
}
- spin_unlock(&osb->osb_lock);
+ spin_unlock(&osb->osb_lock);
+ }
}
void ocfs2_la_enable_worker(struct work_struct *work)
@@ -335,7 +336,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
"found = %u, set = %u, taken = %u, off = %u\n",
num_used, le32_to_cpu(alloc->id1.bitmap1.i_used),
le32_to_cpu(alloc->id1.bitmap1.i_total),
- OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
+ le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off));
status = -EINVAL;
goto bail;
@@ -863,14 +864,8 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
numfound = bitoff = startoff = 0;
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
- while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) {
- if (bitoff == left) {
- /* mlog(0, "bitoff (%d) == left", bitoff); */
- break;
- }
- /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, "
- "numfound = %d\n", bitoff, startoff, numfound);*/
-
+ while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) <
+ left) {
/* Ok, we found a zero bit... is it contig. or do we
* start over?*/
if (bitoff == startoff) {
@@ -976,8 +971,8 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
start = count = 0;
left = le32_to_cpu(alloc->id1.bitmap1.i_total);
- while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start))
- != -1) {
+ while (1) {
+ bit_off = ocfs2_find_next_zero_bit(bitmap, left, start);
if ((bit_off < left) && (bit_off == start)) {
count++;
start++;
@@ -1002,6 +997,7 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb,
goto bail;
}
}
+
if (bit_off >= left)
break;
count = 1;
@@ -1220,7 +1216,7 @@ retry_enospc:
OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
trace_ocfs2_local_alloc_new_window_result(
- OCFS2_LOCAL_ALLOC(alloc)->la_bm_off,
+ le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off),
le32_to_cpu(alloc->id1.bitmap1.i_total));
bail:
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 1834f26522ed..6a314e9f2b49 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -44,16 +44,16 @@ static vm_fault_t ocfs2_fault(struct vm_fault *vmf)
}
static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
- struct buffer_head *di_bh, struct page *page)
+ struct buffer_head *di_bh, struct folio *folio)
{
int err;
vm_fault_t ret = VM_FAULT_NOPAGE;
struct inode *inode = file_inode(file);
struct address_space *mapping = inode->i_mapping;
- loff_t pos = page_offset(page);
+ loff_t pos = folio_pos(folio);
unsigned int len = PAGE_SIZE;
pgoff_t last_index;
- struct page *locked_page = NULL;
+ struct folio *locked_folio = NULL;
void *fsdata;
loff_t size = i_size_read(inode);
@@ -72,9 +72,9 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
*
* Let VM retry with these cases.
*/
- if ((page->mapping != inode->i_mapping) ||
- (!PageUptodate(page)) ||
- (page_offset(page) >= size))
+ if ((folio->mapping != inode->i_mapping) ||
+ !folio_test_uptodate(folio) ||
+ (pos >= size))
goto out;
/*
@@ -87,11 +87,11 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
* worry about ocfs2_write_begin() skipping some buffer reads
* because the "write" would invalidate their data.
*/
- if (page->index == last_index)
+ if (folio->index == last_index)
len = ((size - 1) & ~PAGE_MASK) + 1;
err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
- &locked_page, &fsdata, di_bh, page);
+ &locked_folio, &fsdata, di_bh, folio);
if (err) {
if (err != -ENOSPC)
mlog_errno(err);
@@ -99,7 +99,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file,
goto out;
}
- if (!locked_page) {
+ if (!locked_folio) {
ret = VM_FAULT_NOPAGE;
goto out;
}
@@ -112,7 +112,7 @@ out:
static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
{
- struct page *page = vmf->page;
+ struct folio *folio = page_folio(vmf->page);
struct inode *inode = file_inode(vmf->vma->vm_file);
struct buffer_head *di_bh = NULL;
sigset_t oldset;
@@ -141,7 +141,7 @@ static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf)
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
- ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page);
+ ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, folio);
up_write(&OCFS2_I(inode)->ip_alloc_sem);
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 1f9ed117e78b..369c7d27befd 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -492,7 +492,7 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode,
bg = (struct ocfs2_group_desc *)gd_bh->b_data;
/*
- * moving goal is not allowd to start with a group desc blok(#0 blk)
+ * moving goal is not allowed to start with a group desc blok(#0 blk)
* let's compromise to the latter cluster.
*/
if (range->me_goal == le64_to_cpu(bg->bg_blkno))
@@ -658,7 +658,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
/*
* probe the victim cluster group to find a proper
- * region to fit wanted movement, it even will perfrom
+ * region to fit wanted movement, it even will perform
* a best-effort attempt by compromising to a threshold
* around the goal.
*/
@@ -685,7 +685,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
}
ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh,
- goal_bit, len);
+ goal_bit, len, 0, 0);
if (ret) {
ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len,
le16_to_cpu(gd->bg_chain));
@@ -920,7 +920,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
}
/*
- * rememer ip_xattr_sem also needs to be held if necessary
+ * remember ip_xattr_sem also needs to be held if necessary
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
@@ -1022,7 +1022,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp)
context->range = &range;
/*
- * ok, the default theshold for the defragmentation
+ * ok, the default threshold for the defragmentation
* is 1M, since our maximum clustersize was 1M also.
* any thought?
*/
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 9221a33f917b..99278c8f0e24 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -200,8 +200,10 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
mode = mode_strip_sgid(&nop_mnt_idmap, dir, mode);
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
status = dquot_initialize(inode);
- if (status)
+ if (status) {
+ iput(inode);
return ERR_PTR(status);
+ }
return inode;
}
@@ -506,7 +508,6 @@ static int __ocfs2_mknod_locked(struct inode *dir,
struct inode *inode,
dev_t dev,
struct buffer_head **new_fe_bh,
- struct buffer_head *parent_fe_bh,
handle_t *handle,
struct ocfs2_alloc_context *inode_ac,
u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit)
@@ -566,7 +567,7 @@ static int __ocfs2_mknod_locked(struct inode *dir,
fe->i_last_eb_blk = 0;
strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL);
- ktime_get_real_ts64(&ts);
+ ktime_get_coarse_real_ts64(&ts);
fe->i_atime = fe->i_ctime = fe->i_mtime =
cpu_to_le64(ts.tv_sec);
fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
@@ -639,14 +640,14 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
}
return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh,
- parent_fe_bh, handle, inode_ac,
- fe_blkno, suballoc_loc, suballoc_bit);
+ handle, inode_ac, fe_blkno,
+ suballoc_loc, suballoc_bit);
}
-static int ocfs2_mkdir(struct mnt_idmap *idmap,
- struct inode *dir,
- struct dentry *dentry,
- umode_t mode)
+static struct dentry *ocfs2_mkdir(struct mnt_idmap *idmap,
+ struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode)
{
int ret;
@@ -656,7 +657,7 @@ static int ocfs2_mkdir(struct mnt_idmap *idmap,
if (ret)
mlog_errno(ret);
- return ret;
+ return ERR_PTR(ret);
}
static int ocfs2_create(struct mnt_idmap *idmap,
@@ -797,6 +798,7 @@ static int ocfs2_link(struct dentry *old_dentry,
ocfs2_set_links_count(fe, inode->i_nlink);
fe->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode));
fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode));
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, fe_bh);
err = ocfs2_add_entry(handle, dentry, inode,
@@ -993,6 +995,7 @@ static int ocfs2_unlink(struct inode *dir,
drop_nlink(inode);
drop_nlink(inode);
ocfs2_set_links_count(fe, inode->i_nlink);
+ ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, fe_bh);
inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
@@ -2187,8 +2190,10 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode,
* @osb: ocfs2 file system
* @ret_orphan_dir: Orphan dir inode - returned locked!
* @blkno: Actual block number of the inode to be inserted into orphan dir.
+ * @name: Buffer to store the name of the orphan.
* @lookup: dir lookup result, to be passed back into functions like
* ocfs2_orphan_add
+ * @dio: Flag indicating if direct IO is being used or not.
*
* Returns zero on success and the ret_orphan_dir, name and lookup
* fields will be populated.
@@ -2570,7 +2575,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
clear_nlink(inode);
/* do the real work now. */
status = __ocfs2_mknod_locked(dir, inode,
- 0, &new_di_bh, parent_di_bh, handle,
+ 0, &new_di_bh, handle,
inode_ac, di_blkno, suballoc_loc,
suballoc_bit);
if (status < 0) {
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index a503c553bab2..6aaa94c554c1 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -154,7 +154,7 @@ struct ocfs2_lock_stats {
struct ocfs2_lock_res {
void *l_priv;
- struct ocfs2_lock_res_ops *l_ops;
+ const struct ocfs2_lock_res_ops *l_ops;
struct list_head l_blocked_list;
@@ -284,6 +284,45 @@ enum ocfs2_mount_options
#define OCFS2_OSB_ERROR_FS 0x0004
#define OCFS2_DEFAULT_ATIME_QUANTUM 60
+struct ocfs2_triggers {
+ struct jbd2_buffer_trigger_type ot_triggers;
+ int ot_offset;
+ struct super_block *sb;
+};
+
+enum ocfs2_journal_trigger_type {
+ OCFS2_JTR_DI,
+ OCFS2_JTR_EB,
+ OCFS2_JTR_RB,
+ OCFS2_JTR_GD,
+ OCFS2_JTR_DB,
+ OCFS2_JTR_XB,
+ OCFS2_JTR_DQ,
+ OCFS2_JTR_DR,
+ OCFS2_JTR_DL,
+ OCFS2_JTR_NONE /* This must be the last entry */
+};
+
+#define OCFS2_JOURNAL_TRIGGER_COUNT OCFS2_JTR_NONE
+
+void ocfs2_initialize_journal_triggers(struct super_block *sb,
+ struct ocfs2_triggers triggers[]);
+
+enum ocfs2_recovery_state {
+ OCFS2_REC_ENABLED = 0,
+ OCFS2_REC_QUOTA_WANT_DISABLE,
+ /*
+ * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for
+ * ocfs2_recovery_disable_quota() to work.
+ */
+ OCFS2_REC_QUOTA_DISABLED,
+ OCFS2_REC_WANT_DISABLE,
+ /*
+ * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work
+ */
+ OCFS2_REC_DISABLED,
+};
+
struct ocfs2_journal;
struct ocfs2_slot_info;
struct ocfs2_recovery_map;
@@ -346,11 +385,14 @@ struct ocfs2_super
struct ocfs2_recovery_map *recovery_map;
struct ocfs2_replay_map *replay_map;
struct task_struct *recovery_thread_task;
- int disable_recovery;
+ enum ocfs2_recovery_state recovery_state;
wait_queue_head_t checkpoint_event;
struct ocfs2_journal *journal;
unsigned long osb_commit_interval;
+ /* Journal triggers for checksum */
+ struct ocfs2_triggers s_journal_triggers[OCFS2_JOURNAL_TRIGGER_COUNT];
+
struct delayed_work la_enable_wq;
/*
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 7aebdbf5cc0a..e8e94599e907 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -132,7 +132,7 @@
* well as the name of the cluster being joined.
* mount.ocfs2 must pass in a matching stack name.
*
- * If not set, the classic stack will be used. This is compatbile with
+ * If not set, the classic stack will be used. This is compatible with
* all older versions.
*/
#define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080
@@ -143,7 +143,7 @@
/* Support for extended attributes */
#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
-/* Support for indexed directores */
+/* Support for indexed directories */
#define OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS 0x0400
/* Metadata checksum and error correction */
@@ -156,7 +156,7 @@
#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000
/*
- * Incompat bit to indicate useable clusterinfo with stackflags for all
+ * Incompat bit to indicate usable clusterinfo with stackflags for all
* cluster stacks (userspace adnd o2cb). If this bit is set,
* INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set.
*/
@@ -883,7 +883,8 @@ struct ocfs2_group_desc
__le16 bg_free_bits_count; /* Free bits count */
__le16 bg_chain; /* What chain I am in. */
/*10*/ __le32 bg_generation;
- __le32 bg_reserved1;
+ __le16 bg_contig_free_bits; /* max contig free bits length */
+ __le16 bg_reserved1;
__le64 bg_next_group; /* Next group in my list, in
blocks */
/*20*/ __le64 bg_parent_dinode; /* dinode which owns me, in
@@ -1082,7 +1083,7 @@ struct ocfs2_xattr_block {
struct ocfs2_xattr_header xb_header; /* xattr header if this
block contains xattr */
struct ocfs2_xattr_tree_root xb_root;/* xattr tree root if this
- block cotains xattr
+ block contains xattr
tree. */
} xb_attrs;
};
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h
index 9680797bc531..2de2f8733283 100644
--- a/fs/ocfs2/ocfs2_ioctl.h
+++ b/fs/ocfs2/ocfs2_ioctl.h
@@ -215,7 +215,7 @@ struct ocfs2_move_extents {
movement less likely
to fail, may make fs
even more fragmented */
-#define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmenation
+#define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmentation
completely gets done.
*/
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index 8ac357ce6a30..9b234c03d693 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -93,7 +93,7 @@ static char *ocfs2_lock_type_strings[] = {
[OCFS2_LOCK_TYPE_DATA] = "Data",
[OCFS2_LOCK_TYPE_SUPER] = "Super",
[OCFS2_LOCK_TYPE_RENAME] = "Rename",
- /* Need to differntiate from [R]ename.. serializing writes is the
+ /* Need to differentiate from [R]ename.. serializing writes is the
* important job it does, anyway. */
[OCFS2_LOCK_TYPE_RW] = "Write/Read",
[OCFS2_LOCK_TYPE_DENTRY] = "Dentry",
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index 9898c11bdfa1..54ed1495de9a 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -82,7 +82,7 @@ DECLARE_EVENT_CLASS(ocfs2__string,
__string(name,name)
),
TP_fast_assign(
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%s", __get_str(name))
);
@@ -1289,7 +1289,7 @@ DECLARE_EVENT_CLASS(ocfs2__file_ops,
__entry->dentry = dentry;
__entry->ino = ino;
__entry->d_len = d_len;
- __assign_str(d_name, d_name);
+ __assign_str(d_name);
__entry->para = para;
),
TP_printk("%p %p %p %llu %llu %.*s", __entry->inode, __entry->file,
@@ -1425,7 +1425,7 @@ TRACE_EVENT(ocfs2_setattr,
__entry->dentry = dentry;
__entry->ino = ino;
__entry->d_len = d_len;
- __assign_str(d_name, d_name);
+ __assign_str(d_name);
__entry->ia_valid = ia_valid;
__entry->ia_mode = ia_mode;
__entry->ia_uid = ia_uid;
@@ -1658,34 +1658,34 @@ TRACE_EVENT(ocfs2_remount,
);
TRACE_EVENT(ocfs2_fill_super,
- TP_PROTO(void *sb, void *data, int silent),
- TP_ARGS(sb, data, silent),
+ TP_PROTO(void *sb, void *fc, int silent),
+ TP_ARGS(sb, fc, silent),
TP_STRUCT__entry(
__field(void *, sb)
- __field(void *, data)
+ __field(void *, fc)
__field(int, silent)
),
TP_fast_assign(
__entry->sb = sb;
- __entry->data = data;
+ __entry->fc = fc;
__entry->silent = silent;
),
TP_printk("%p %p %d", __entry->sb,
- __entry->data, __entry->silent)
+ __entry->fc, __entry->silent)
);
TRACE_EVENT(ocfs2_parse_options,
- TP_PROTO(int is_remount, char *options),
- TP_ARGS(is_remount, options),
+ TP_PROTO(int is_remount, const char *option),
+ TP_ARGS(is_remount, option),
TP_STRUCT__entry(
__field(int, is_remount)
- __string(options, options)
+ __string(option, option)
),
TP_fast_assign(
__entry->is_remount = is_remount;
- __assign_str(options, options);
+ __assign_str(option);
),
- TP_printk("%d %s", __entry->is_remount, __get_str(options))
+ TP_printk("%d %s", __entry->is_remount, __get_str(option))
);
DEFINE_OCFS2_POINTER_EVENT(ocfs2_put_super);
@@ -1718,8 +1718,8 @@ TRACE_EVENT(ocfs2_initialize_super,
__field(int, cluster_bits)
),
TP_fast_assign(
- __assign_str(label, label);
- __assign_str(uuid_str, uuid_str);
+ __assign_str(label);
+ __assign_str(uuid_str);
__entry->root_dir = root_dir;
__entry->system_dir = system_dir;
__entry->cluster_bits = cluster_bits;
@@ -1746,7 +1746,7 @@ TRACE_EVENT(ocfs2_init_xattr_set_ctxt,
__field(int, credits)
),
TP_fast_assign(
- __assign_str(name, name);
+ __assign_str(name);
__entry->meta = meta;
__entry->clusters = clusters;
__entry->credits = credits;
@@ -1770,7 +1770,7 @@ DECLARE_EVENT_CLASS(ocfs2__xattr_find,
),
TP_fast_assign(
__entry->ino = ino;
- __assign_str(name, name);
+ __assign_str(name);
__entry->name_index = name_index;
__entry->hash = hash;
__entry->location = location;
@@ -2019,7 +2019,7 @@ TRACE_EVENT(ocfs2_sync_dquot_helper,
__entry->dq_id = dq_id;
__entry->dq_type = dq_type;
__entry->type = type;
- __assign_str(s_id, s_id);
+ __assign_str(s_id);
),
TP_printk("%u %u %lu %s", __entry->dq_id, __entry->dq_type,
__entry->type, __get_str(s_id))
@@ -2060,7 +2060,7 @@ TRACE_EVENT(ocfs2_dx_dir_search,
TP_fast_assign(
__entry->ino = ino;
__entry->namelen = namelen;
- __assign_str(name, name);
+ __assign_str(name);
__entry->major_hash = major_hash;
__entry->minor_hash = minor_hash;
__entry->blkno = blkno;
@@ -2088,7 +2088,7 @@ TRACE_EVENT(ocfs2_find_files_on_disk,
),
TP_fast_assign(
__entry->namelen = namelen;
- __assign_str(name, name);
+ __assign_str(name);
__entry->blkno = blkno;
__entry->dir = dir;
),
@@ -2107,7 +2107,7 @@ TRACE_EVENT(ocfs2_check_dir_for_entry,
TP_fast_assign(
__entry->dir = dir;
__entry->namelen = namelen;
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%llu %.*s", __entry->dir,
__entry->namelen, __get_str(name))
@@ -2135,7 +2135,7 @@ TRACE_EVENT(ocfs2_dx_dir_index_root_block,
__entry->major_hash = major_hash;
__entry->minor_hash = minor_hash;
__entry->namelen = namelen;
- __assign_str(name, name);
+ __assign_str(name);
__entry->num_used = num_used;
),
TP_printk("%llu %x %x %.*s %u", __entry->dir,
@@ -2171,7 +2171,7 @@ DECLARE_EVENT_CLASS(ocfs2__dentry_ops,
__entry->dir = dir;
__entry->dentry = dentry;
__entry->name_len = name_len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->dir_blkno = dir_blkno;
__entry->extra = extra;
),
@@ -2217,7 +2217,7 @@ TRACE_EVENT(ocfs2_mknod,
__entry->dir = dir;
__entry->dentry = dentry;
__entry->name_len = name_len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->dir_blkno = dir_blkno;
__entry->dev = dev;
__entry->mode = mode;
@@ -2241,9 +2241,9 @@ TRACE_EVENT(ocfs2_link,
TP_fast_assign(
__entry->ino = ino;
__entry->old_len = old_len;
- __assign_str(old_name, old_name);
+ __assign_str(old_name);
__entry->name_len = name_len;
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%llu %.*s %.*s", __entry->ino,
__entry->old_len, __get_str(old_name),
@@ -2279,9 +2279,9 @@ TRACE_EVENT(ocfs2_rename,
__entry->new_dir = new_dir;
__entry->new_dentry = new_dentry;
__entry->old_len = old_len;
- __assign_str(old_name, old_name);
+ __assign_str(old_name);
__entry->new_len = new_len;
- __assign_str(new_name, new_name);
+ __assign_str(new_name);
),
TP_printk("%p %p %p %p %.*s %.*s",
__entry->old_dir, __entry->old_dentry,
@@ -2301,7 +2301,7 @@ TRACE_EVENT(ocfs2_rename_target_exists,
),
TP_fast_assign(
__entry->new_len = new_len;
- __assign_str(new_name, new_name);
+ __assign_str(new_name);
),
TP_printk("%.*s", __entry->new_len, __get_str(new_name))
);
@@ -2344,7 +2344,7 @@ TRACE_EVENT(ocfs2_symlink_begin,
__entry->dentry = dentry;
__entry->symname = symname;
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%p %p %s %.*s", __entry->dir, __entry->dentry,
__entry->symname, __entry->len, __get_str(name))
@@ -2360,7 +2360,7 @@ TRACE_EVENT(ocfs2_blkno_stringify,
),
TP_fast_assign(
__entry->blkno = blkno;
- __assign_str(name, name);
+ __assign_str(name);
__entry->namelen = namelen;
),
TP_printk("%llu %s %d", __entry->blkno, __get_str(name),
@@ -2381,7 +2381,7 @@ TRACE_EVENT(ocfs2_orphan_del,
),
TP_fast_assign(
__entry->dir = dir;
- __assign_str(name, name);
+ __assign_str(name);
__entry->namelen = namelen;
),
TP_printk("%llu %s %d", __entry->dir, __get_str(name),
@@ -2403,7 +2403,7 @@ TRACE_EVENT(ocfs2_dentry_revalidate,
TP_fast_assign(
__entry->dentry = dentry;
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%p %.*s", __entry->dentry, __entry->len, __get_str(name))
);
@@ -2420,7 +2420,7 @@ TRACE_EVENT(ocfs2_dentry_revalidate_negative,
),
TP_fast_assign(
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->pgen = pgen;
__entry->gen = gen;
),
@@ -2445,7 +2445,7 @@ TRACE_EVENT(ocfs2_find_local_alias,
),
TP_fast_assign(
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
),
TP_printk("%.*s", __entry->len, __get_str(name))
);
@@ -2462,7 +2462,7 @@ TRACE_EVENT(ocfs2_dentry_attach_lock,
),
TP_fast_assign(
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->parent = parent;
__entry->fsdata = fsdata;
),
@@ -2480,7 +2480,7 @@ TRACE_EVENT(ocfs2_dentry_attach_lock_found,
__field(unsigned long long, ino)
),
TP_fast_assign(
- __assign_str(name, name);
+ __assign_str(name);
__entry->parent = parent;
__entry->ino = ino;
),
@@ -2527,7 +2527,7 @@ TRACE_EVENT(ocfs2_get_parent,
TP_fast_assign(
__entry->child = child;
__entry->len = len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->ino = ino;
),
TP_printk("%p %.*s %llu", __entry->child, __entry->len,
@@ -2551,7 +2551,7 @@ TRACE_EVENT(ocfs2_encode_fh_begin,
TP_fast_assign(
__entry->dentry = dentry;
__entry->name_len = name_len;
- __assign_str(name, name);
+ __assign_str(name);
__entry->fh = fh;
__entry->len = len;
__entry->connectable = connectable;
@@ -2577,6 +2577,8 @@ DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_commit_cache_end);
DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans);
+DEFINE_OCFS2_INT_EVENT(ocfs2_assure_trans_credits);
+
DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart);
DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans);
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index ebb5c99f490e..788a8de922a4 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -97,7 +97,6 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off);
int ocfs2_global_read_info(struct super_block *sb, int type);
int ocfs2_global_write_info(struct super_block *sb, int type);
-int ocfs2_global_read_dquot(struct dquot *dquot);
int __ocfs2_sync_dquot(struct dquot *dquot, int freeing);
static inline int ocfs2_sync_dquot(struct dquot *dquot)
{
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 0575c2d060eb..e85b1ccf81be 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -273,7 +273,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
if (new)
memset(bh->b_data, 0, sb->s_blocksize);
memcpy(bh->b_data + offset, data, len);
- flush_dcache_page(bh->b_page);
+ flush_dcache_folio(bh->b_folio);
set_buffer_uptodate(bh);
unlock_buffer(bh);
ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh);
@@ -371,12 +371,16 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk,
&pcount, NULL);
- if (status < 0)
+ if (status < 0) {
+ mlog_errno(status);
goto out_unlock;
+ }
status = ocfs2_qinfo_lock(oinfo, 0);
- if (status < 0)
+ if (status < 0) {
+ mlog_errno(status);
goto out_unlock;
+ }
status = sb->s_op->quota_read(sb, type, (char *)&dinfo,
sizeof(struct ocfs2_global_disk_dqinfo),
OCFS2_GLOBAL_INFO_OFF);
@@ -404,12 +408,11 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
schedule_delayed_work(&oinfo->dqi_sync_work,
msecs_to_jiffies(oinfo->dqi_syncms));
-out_err:
- return status;
+ return 0;
out_unlock:
ocfs2_unlock_global_qf(oinfo, 0);
- mlog_errno(status);
- goto out_err;
+out_err:
+ return status;
}
/* Write information to global quota file. Expects exclusive lock on quota
@@ -758,6 +761,11 @@ static int ocfs2_release_dquot(struct dquot *dquot)
handle = ocfs2_start_trans(osb,
ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_id.type));
if (IS_ERR(handle)) {
+ /*
+ * Mark dquot as inactive to avoid endless cycle in
+ * quota_release_workfn().
+ */
+ clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
status = PTR_ERR(handle);
mlog_errno(status);
goto out_ilock;
@@ -890,7 +898,7 @@ static int ocfs2_get_next_id(struct super_block *sb, struct kqid *qid)
int status = 0;
trace_ocfs2_get_next_id(from_kqid(&init_user_ns, *qid), type);
- if (!sb_has_quota_loaded(sb, type)) {
+ if (!sb_has_quota_active(sb, type)) {
status = -ESRCH;
goto out;
}
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 8ce462c64c51..de7f12858729 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -453,8 +453,7 @@ out:
/* Sync changes in local quota file into global quota file and
* reinitialize local quota file.
- * The function expects local quota file to be already locked and
- * s_umount locked in shared mode. */
+ * The function expects local quota file to be already locked. */
static int ocfs2_recover_local_quota_file(struct inode *lqinode,
int type,
struct ocfs2_quota_recovery *rec)
@@ -588,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
{
unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE,
LOCAL_GROUP_QUOTA_SYSTEM_INODE };
- struct super_block *sb = osb->sb;
struct ocfs2_local_disk_dqinfo *ldinfo;
struct buffer_head *bh;
handle_t *handle;
@@ -600,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for "
"slot %u\n", osb->dev_str, slot_num);
- down_read(&sb->s_umount);
for (type = 0; type < OCFS2_MAXQUOTAS; type++) {
if (list_empty(&(rec->r_list[type])))
continue;
@@ -677,8 +674,7 @@ out_put:
break;
}
out:
- up_read(&sb->s_umount);
- kfree(rec);
+ ocfs2_free_quota_recovery(rec);
return status;
}
@@ -692,7 +688,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
int status;
struct buffer_head *bh = NULL;
struct ocfs2_quota_recovery *rec;
- int locked = 0;
+ int locked = 0, global_read = 0;
info->dqi_max_spc_limit = 0x7fffffffffffffffLL;
info->dqi_max_ino_limit = 0x7fffffffffffffffLL;
@@ -700,6 +696,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
if (!oinfo) {
mlog(ML_ERROR, "failed to allocate memory for ocfs2 quota"
" info.");
+ status = -ENOMEM;
goto out_err;
}
info->dqi_priv = oinfo;
@@ -712,6 +709,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
status = ocfs2_global_read_info(sb, type);
if (status < 0)
goto out_err;
+ global_read = 1;
status = ocfs2_inode_lock(lqinode, &oinfo->dqi_lqi_bh, 1);
if (status < 0) {
@@ -782,10 +780,12 @@ out_err:
if (locked)
ocfs2_inode_unlock(lqinode, 1);
ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
+ if (global_read)
+ cancel_delayed_work_sync(&oinfo->dqi_sync_work);
kfree(oinfo);
}
brelse(bh);
- return -1;
+ return status;
}
/* Write local info to quota file */
@@ -839,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type)
ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk);
/*
- * s_umount held in exclusive mode protects us against racing with
- * recovery thread...
+ * ocfs2_dismount_volume() has already aborted quota recovery...
*/
if (oinfo->dqi_rec) {
ocfs2_free_quota_recovery(oinfo->dqi_rec);
@@ -863,6 +862,7 @@ out:
brelse(oinfo->dqi_libh);
brelse(oinfo->dqi_lqi_bh);
kfree(oinfo);
+ info->dqi_priv = NULL;
return status;
}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3f80a56d0d60..8f732742b26e 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -25,6 +25,7 @@
#include "namei.h"
#include "ocfs2_trace.h"
#include "file.h"
+#include "symlink.h"
#include <linux/bio.h>
#include <linux/blkdev.h>
@@ -630,7 +631,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
rb->rf_records.rl_count =
cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
spin_lock(&osb->osb_lock);
- rb->rf_generation = osb->s_next_generation++;
+ rb->rf_generation = cpu_to_le32(osb->s_next_generation++);
spin_unlock(&osb->osb_lock);
ocfs2_journal_dirty(handle, new_bh);
@@ -1392,13 +1393,6 @@ static int cmp_refcount_rec_by_cpos(const void *a, const void *b)
return 0;
}
-static void swap_refcount_rec(void *a, void *b, int size)
-{
- struct ocfs2_refcount_rec *l = a, *r = b;
-
- swap(*l, *r);
-}
-
/*
* The refcount cpos are ordered by their 64bit cpos,
* But we will use the low 32 bit to be the e_cpos in the b-tree.
@@ -1474,7 +1468,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
*/
sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
sizeof(struct ocfs2_refcount_rec),
- cmp_refcount_rec_by_low_cpos, swap_refcount_rec);
+ cmp_refcount_rec_by_low_cpos, NULL);
ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index);
if (ret) {
@@ -1499,11 +1493,11 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
sizeof(struct ocfs2_refcount_rec),
- cmp_refcount_rec_by_cpos, swap_refcount_rec);
+ cmp_refcount_rec_by_cpos, NULL);
sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used),
sizeof(struct ocfs2_refcount_rec),
- cmp_refcount_rec_by_cpos, swap_refcount_rec);
+ cmp_refcount_rec_by_cpos, NULL);
*split_cpos = cpos;
return 0;
@@ -2426,7 +2420,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
*
* If we will insert a new one, this is easy and only happens
* during adding refcounted flag to the extent, so we don't
- * have a chance of spliting. We just need one record.
+ * have a chance of splitting. We just need one record.
*
* If the refcount rec already exists, that would be a little
* complicated. we may have to:
@@ -2616,11 +2610,11 @@ static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
/*
* Calculate out the start and number of virtual clusters we need to CoW.
*
- * cpos is vitual start cluster position we want to do CoW in a
+ * cpos is virtual start cluster position we want to do CoW in a
* file and write_len is the cluster length.
* max_cpos is the place where we want to stop CoW intentionally.
*
- * Normal we will start CoW from the beginning of extent record cotaining cpos.
+ * Normal we will start CoW from the beginning of extent record containing cpos.
* We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we
* get good I/O from the resulting extent tree.
*/
@@ -2908,7 +2902,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
int ret = 0, partial;
struct super_block *sb = inode->i_sb;
u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
- struct page *page;
pgoff_t page_index;
unsigned int from, to;
loff_t offset, end, map_end;
@@ -2927,6 +2920,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
end = i_size_read(inode);
while (offset < end) {
+ struct folio *folio;
page_index = offset >> PAGE_SHIFT;
map_end = ((loff_t)page_index + 1) << PAGE_SHIFT;
if (map_end > end)
@@ -2939,9 +2933,10 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
to = map_end & (PAGE_SIZE - 1);
retry:
- page = find_or_create_page(mapping, page_index, GFP_NOFS);
- if (!page) {
- ret = -ENOMEM;
+ folio = __filemap_get_folio(mapping, page_index,
+ FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS);
+ if (IS_ERR(folio)) {
+ ret = PTR_ERR(folio);
mlog_errno(ret);
break;
}
@@ -2951,9 +2946,9 @@ retry:
* page, so write it back.
*/
if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) {
- if (PageDirty(page)) {
- unlock_page(page);
- put_page(page);
+ if (folio_test_dirty(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
ret = filemap_write_and_wait_range(mapping,
offset, map_end - 1);
@@ -2961,9 +2956,7 @@ retry:
}
}
- if (!PageUptodate(page)) {
- struct folio *folio = page_folio(page);
-
+ if (!folio_test_uptodate(folio)) {
ret = block_read_full_folio(folio, ocfs2_get_block);
if (ret) {
mlog_errno(ret);
@@ -2972,8 +2965,8 @@ retry:
folio_lock(folio);
}
- if (page_has_buffers(page)) {
- ret = walk_page_buffers(handle, page_buffers(page),
+ if (folio_buffers(folio)) {
+ ret = walk_page_buffers(handle, folio_buffers(folio),
from, to, &partial,
ocfs2_clear_cow_buffer);
if (ret) {
@@ -2982,14 +2975,12 @@ retry:
}
}
- ocfs2_map_and_dirty_page(inode,
- handle, from, to,
- page, 0, &new_block);
- mark_page_accessed(page);
+ ocfs2_map_and_dirty_folio(inode, handle, from, to,
+ folio, 0, &new_block);
+ folio_mark_accessed(folio);
unlock:
- unlock_page(page);
- put_page(page);
- page = NULL;
+ folio_unlock(folio);
+ folio_put(folio);
offset = map_end;
if (ret)
break;
@@ -4155,8 +4146,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
int ret;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *new_bh = NULL;
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
- if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
+ if (oi->ip_flags & OCFS2_INODE_SYSTEM_FILE) {
ret = -EINVAL;
mlog_errno(ret);
goto out;
@@ -4182,6 +4174,26 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto out_unlock;
}
+ if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) &&
+ (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
+ /*
+ * Adjust extent record count to reserve space for extended attribute.
+ * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
+ */
+ struct ocfs2_inode_info *new_oi = OCFS2_I(new_inode);
+
+ if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
+ !(ocfs2_inode_is_fast_symlink(new_inode))) {
+ struct ocfs2_dinode *new_di = (struct ocfs2_dinode *)new_bh->b_data;
+ struct ocfs2_dinode *old_di = (struct ocfs2_dinode *)old_bh->b_data;
+ struct ocfs2_extent_list *el = &new_di->id2.i_list;
+ int inline_size = le16_to_cpu(old_di->i_xattr_inline_size);
+
+ le16_add_cpu(&el->l_count, -(inline_size /
+ sizeof(struct ocfs2_extent_rec)));
+ }
+ }
+
ret = ocfs2_create_reflink_node(inode, old_bh,
new_inode, new_bh, preserve);
if (ret) {
@@ -4189,7 +4201,7 @@ static int __ocfs2_reflink(struct dentry *old_dentry,
goto inode_unlock;
}
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
+ if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
ret = ocfs2_reflink_xattrs(inode, old_bh,
new_inode, new_bh,
preserve);
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
index a9d1296d736d..1fe61974d9f0 100644
--- a/fs/ocfs2/reservations.c
+++ b/fs/ocfs2/reservations.c
@@ -414,7 +414,7 @@ static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
start = search_start;
while ((offset = ocfs2_find_next_zero_bit(bitmap, resmap->m_bitmap_len,
- start)) != -1) {
+ start)) < resmap->m_bitmap_len) {
/* Search reached end of the region */
if (offset >= (search_start + search_len))
break;
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
index ec8101ef5717..4fce17180342 100644
--- a/fs/ocfs2/reservations.h
+++ b/fs/ocfs2/reservations.h
@@ -31,7 +31,7 @@ struct ocfs2_alloc_reservation {
#define OCFS2_RESV_FLAG_INUSE 0x01 /* Set when r_node is part of a btree */
#define OCFS2_RESV_FLAG_TMP 0x02 /* Temporary reservation, will be
- * destroyed immedately after use */
+ * destroyed immediately after use */
#define OCFS2_RESV_FLAG_DIR 0x04 /* Reservation is for an unindexed
* directory btree */
@@ -125,7 +125,7 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
/**
* ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used.
* @resmap: reservations bitmap
- * @resv: optional reservation to recalulate based on new bitmap
+ * @resv: optional reservation to recalculate based on new bitmap
* @cstart: start of allocation in clusters
* @clen: end of allocation in clusters.
*
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index d65d43c61857..b0733c08ed13 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -91,6 +91,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
u16 cl_bpc = le16_to_cpu(cl->cl_bpc);
u16 cl_cpg = le16_to_cpu(cl->cl_cpg);
u16 old_bg_clusters;
+ u16 contig_bits;
+ __le16 old_bg_contig_free_bits;
trace_ocfs2_update_last_group_and_inode(new_clusters,
first_new_cluster);
@@ -122,6 +124,11 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
}
+ contig_bits = ocfs2_find_max_contig_free_bits(group->bg_bitmap,
+ le16_to_cpu(group->bg_bits), 0);
+ old_bg_contig_free_bits = group->bg_contig_free_bits;
+ group->bg_contig_free_bits = cpu_to_le16(contig_bits);
+
ocfs2_journal_dirty(handle, group_bh);
/* update the inode accordingly. */
@@ -160,6 +167,7 @@ out_rollback:
le16_add_cpu(&group->bg_free_bits_count, backups);
le16_add_cpu(&group->bg_bits, -1 * num_bits);
le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits);
+ group->bg_contig_free_bits = old_bg_contig_free_bits;
}
out:
if (ret)
@@ -566,6 +574,8 @@ out_commit:
ocfs2_commit_trans(osb, handle);
out_free_group_bh:
+ if (ret < 0)
+ ocfs2_remove_from_cache(INODE_CACHE(inode), group_bh);
brelse(group_bh);
out_unlock:
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index c973c03f6fd8..f58e891aa2da 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -227,7 +227,7 @@ static int o2cb_dlm_lock_status(struct ocfs2_dlm_lksb *lksb)
}
/*
- * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB
+ * o2dlm always has a "valid" LVB. If the dlm loses track of the LVB
* contents, it will zero out the LVB. Thus the caller can always trust
* the contents.
*/
@@ -404,7 +404,7 @@ static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn,
return 0;
}
-static struct ocfs2_stack_operations o2cb_stack_ops = {
+static const struct ocfs2_stack_operations o2cb_stack_ops = {
.connect = o2cb_cluster_connect,
.disconnect = o2cb_cluster_disconnect,
.this_node = o2cb_cluster_this_node,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index c11406cd87a8..77edcd70f72c 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -1065,7 +1065,7 @@ static int user_cluster_this_node(struct ocfs2_cluster_connection *conn,
return 0;
}
-static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
+static const struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
.connect = user_cluster_connect,
.disconnect = user_cluster_disconnect,
.this_node = user_cluster_this_node,
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 20aa37b67cfb..a28c127b9934 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -650,7 +650,7 @@ error:
* and easier to preserve the name.
*/
-static struct ctl_table ocfs2_nm_table[] = {
+static const struct ctl_table ocfs2_nm_table[] = {
{
.procname = "hb_ctl_path",
.data = ocfs2_hb_ctl_path,
@@ -691,8 +691,7 @@ static void __exit ocfs2_stack_glue_exit(void)
memset(&locking_max_version, 0,
sizeof(struct ocfs2_protocol_version));
ocfs2_sysfs_exit();
- if (ocfs2_table_header)
- unregister_sysctl_table(ocfs2_table_header);
+ unregister_sysctl_table(ocfs2_table_header);
}
MODULE_AUTHOR("Oracle");
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 3636847fae19..5486a6dce70a 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -210,7 +210,7 @@ struct ocfs2_stack_operations {
struct file_lock *fl);
/*
- * This is an optoinal debugging hook. If provided, the
+ * This is an optional debugging hook. If provided, the
* stack can dump debugging information about this lock.
*/
void (*dump_lksb)(struct ocfs2_dlm_lksb *lksb);
@@ -223,7 +223,7 @@ struct ocfs2_stack_operations {
*/
struct ocfs2_stack_plugin {
char *sp_name;
- struct ocfs2_stack_operations *sp_ops;
+ const struct ocfs2_stack_operations *sp_ops;
struct module *sp_owner;
/* These are managed by the stackglue code. */
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 166c8918c825..6ac4dcd54588 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -50,6 +50,10 @@ struct ocfs2_suballoc_result {
u64 sr_blkno; /* The first allocated block */
unsigned int sr_bit_offset; /* The bit in the bg */
unsigned int sr_bits; /* How many bits we claimed */
+ unsigned int sr_max_contig_bits; /* The length for contiguous
+ * free bits, only available
+ * for cluster group
+ */
};
static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res)
@@ -694,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
ac, cl);
- if (PTR_ERR(bg_bh) == -ENOSPC)
+ if (PTR_ERR(bg_bh) == -ENOSPC) {
+ ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
bg_bh = ocfs2_block_group_alloc_discontig(handle,
alloc_inode,
ac, cl);
+ }
if (IS_ERR(bg_bh)) {
status = PTR_ERR(bg_bh);
bg_bh = NULL;
@@ -1272,6 +1278,26 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
return ret;
}
+u16 ocfs2_find_max_contig_free_bits(void *bitmap,
+ u16 total_bits, u16 start)
+{
+ u16 offset, free_bits;
+ u16 contig_bits = 0;
+
+ while (start < total_bits) {
+ offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start);
+ if (offset == total_bits)
+ break;
+
+ start = ocfs2_find_next_bit(bitmap, total_bits, offset);
+ free_bits = start - offset;
+ if (contig_bits < free_bits)
+ contig_bits = free_bits;
+ }
+
+ return contig_bits;
+}
+
static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
struct buffer_head *bg_bh,
unsigned int bits_wanted,
@@ -1280,6 +1306,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
{
void *bitmap;
u16 best_offset, best_size;
+ u16 prev_best_size = 0;
int offset, start, found, status = 0;
struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
@@ -1290,10 +1317,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
found = start = best_offset = best_size = 0;
bitmap = bg->bg_bitmap;
- while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
- if (offset == total_bits)
- break;
-
+ while ((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) <
+ total_bits) {
if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
/* We found a zero, but we can't use it as it
* hasn't been put to disk yet! */
@@ -1308,6 +1333,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
/* got a zero after some ones */
found = 1;
start = offset + 1;
+ prev_best_size = best_size;
}
if (found > best_size) {
best_size = found;
@@ -1320,6 +1346,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
}
}
+ /* best_size will be allocated, we save prev_best_size */
+ res->sr_max_contig_bits = prev_best_size;
if (best_size) {
res->sr_bit_offset = best_offset;
res->sr_bits = best_size;
@@ -1337,11 +1365,16 @@ int ocfs2_block_group_set_bits(handle_t *handle,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
unsigned int bit_off,
- unsigned int num_bits)
+ unsigned int num_bits,
+ unsigned int max_contig_bits,
+ int fastpath)
{
int status;
void *bitmap = bg->bg_bitmap;
int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
+ unsigned int start = bit_off + num_bits;
+ u16 contig_bits;
+ struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
/* All callers get the descriptor via
* ocfs2_read_group_descriptor(). Any corruption is a code bug. */
@@ -1373,6 +1406,29 @@ int ocfs2_block_group_set_bits(handle_t *handle,
while(num_bits--)
ocfs2_set_bit(bit_off++, bitmap);
+ /*
+ * this is optimize path, caller set old contig value
+ * in max_contig_bits to bypass finding action.
+ */
+ if (fastpath) {
+ bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits);
+ } else if (ocfs2_is_cluster_bitmap(alloc_inode)) {
+ /*
+ * Usually, the block group bitmap allocates only 1 bit
+ * at a time, while the cluster group allocates n bits
+ * each time. Therefore, we only save the contig bits for
+ * the cluster group.
+ */
+ contig_bits = ocfs2_find_max_contig_free_bits(bitmap,
+ le16_to_cpu(bg->bg_bits), start);
+ if (contig_bits > max_contig_bits)
+ max_contig_bits = contig_bits;
+ bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits);
+ ocfs2_local_alloc_seen_free_bits(osb, max_contig_bits);
+ } else {
+ bg->bg_contig_free_bits = 0;
+ }
+
ocfs2_journal_dirty(handle, group_bh);
bail:
@@ -1486,7 +1542,12 @@ static int ocfs2_cluster_group_search(struct inode *inode,
BUG_ON(!ocfs2_is_cluster_bitmap(inode));
- if (gd->bg_free_bits_count) {
+ if (le16_to_cpu(gd->bg_contig_free_bits) &&
+ le16_to_cpu(gd->bg_contig_free_bits) < bits_wanted)
+ return -ENOSPC;
+
+ /* ->bg_contig_free_bits may un-initialized, so compare again */
+ if (le16_to_cpu(gd->bg_free_bits_count) >= bits_wanted) {
max_bits = le16_to_cpu(gd->bg_bits);
/* Tail groups in cluster bitmaps which aren't cpg
@@ -1530,13 +1591,6 @@ static int ocfs2_cluster_group_search(struct inode *inode,
* of bits. */
if (min_bits <= res->sr_bits)
search = 0; /* success */
- else if (res->sr_bits) {
- /*
- * Don't show bits which we'll be returning
- * for allocation to the local alloc bitmap.
- */
- ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits);
- }
}
return search;
@@ -1555,7 +1609,7 @@ static int ocfs2_block_group_search(struct inode *inode,
BUG_ON(min_bits != 1);
BUG_ON(ocfs2_is_cluster_bitmap(inode));
- if (bg->bg_free_bits_count) {
+ if (le16_to_cpu(bg->bg_free_bits_count) >= bits_wanted) {
ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
group_bh, bits_wanted,
le16_to_cpu(bg->bg_bits),
@@ -1715,7 +1769,8 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
}
ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
- res->sr_bit_offset, res->sr_bits);
+ res->sr_bit_offset, res->sr_bits,
+ res->sr_max_contig_bits, 0);
if (ret < 0) {
ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh,
res->sr_bits,
@@ -1741,6 +1796,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
{
int status;
u16 chain;
+ u32 contig_bits;
u64 next_group;
struct inode *alloc_inode = ac->ac_inode;
struct buffer_head *group_bh = NULL;
@@ -1766,10 +1822,21 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
status = -ENOSPC;
/* for now, the chain search is a bit simplistic. We just use
* the 1st group with any empty bits. */
- while ((status = ac->ac_group_search(alloc_inode, group_bh,
- bits_wanted, min_bits,
- ac->ac_max_block,
- res)) == -ENOSPC) {
+ while (1) {
+ if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) {
+ contig_bits = le16_to_cpu(bg->bg_contig_free_bits);
+ if (!contig_bits)
+ contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap,
+ le16_to_cpu(bg->bg_bits), 0);
+ if (bits_wanted > contig_bits && contig_bits >= min_bits)
+ bits_wanted = contig_bits;
+ }
+
+ status = ac->ac_group_search(alloc_inode, group_bh,
+ bits_wanted, min_bits,
+ ac->ac_max_block, res);
+ if (status != -ENOSPC)
+ break;
if (!bg->bg_next_group)
break;
@@ -1849,7 +1916,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
bg,
group_bh,
res->sr_bit_offset,
- res->sr_bits);
+ res->sr_bits,
+ res->sr_max_contig_bits,
+ 0);
if (status < 0) {
ocfs2_rollback_alloc_dinode_counts(alloc_inode,
ac->ac_bh, res->sr_bits, chain);
@@ -1927,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
victim = ocfs2_find_victim_chain(cl);
ac->ac_chain = victim;
+search:
status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
res, &bits_left);
if (!status) {
@@ -1951,7 +2021,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
if (i == victim)
continue;
- if (!cl->cl_recs[i].c_free)
+ if (le32_to_cpu(cl->cl_recs[i].c_free) < bits_wanted)
continue;
ac->ac_chain = i;
@@ -1967,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
}
}
+ /* Chains can't supply the bits_wanted contiguous space.
+ * We should switch to using every single bit when allocating
+ * from the global bitmap. */
+ if (i == le16_to_cpu(cl->cl_next_free_rec) &&
+ status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) {
+ ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG;
+ ac->ac_chain = victim;
+ goto search;
+ }
+
set_hint:
if (status != -ENOSPC) {
/* If the next search of this group is not likely to
@@ -2163,7 +2243,9 @@ int ocfs2_claim_new_inode_at_loc(handle_t *handle,
bg,
bg_bh,
res->sr_bit_offset,
- res->sr_bits);
+ res->sr_bits,
+ res->sr_max_contig_bits,
+ 0);
if (ret < 0) {
ocfs2_rollback_alloc_dinode_counts(ac->ac_inode,
ac->ac_bh, res->sr_bits, chain);
@@ -2308,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *handle,
BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
- && ac->ac_which != OCFS2_AC_USE_MAIN);
+ && ac->ac_which != OCFS2_AC_USE_MAIN
+ && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG);
if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
WARN_ON(min_clusters > 1);
@@ -2382,11 +2465,13 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
struct buffer_head *group_bh,
unsigned int bit_off,
unsigned int num_bits,
+ unsigned int max_contig_bits,
void (*undo_fn)(unsigned int bit,
unsigned long *bmap))
{
int status;
unsigned int tmp;
+ u16 contig_bits;
struct ocfs2_group_desc *undo_bg = NULL;
struct journal_head *jh;
@@ -2433,6 +2518,20 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
num_bits);
}
+ /*
+ * TODO: even 'num_bits == 1' (the worst case, release 1 cluster),
+ * we still need to rescan whole bitmap.
+ */
+ if (ocfs2_is_cluster_bitmap(alloc_inode)) {
+ contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap,
+ le16_to_cpu(bg->bg_bits), 0);
+ if (contig_bits > max_contig_bits)
+ max_contig_bits = contig_bits;
+ bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits);
+ } else {
+ bg->bg_contig_free_bits = 0;
+ }
+
if (undo_fn)
spin_unlock(&jh->b_state_lock);
@@ -2459,6 +2558,7 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
struct ocfs2_chain_list *cl = &fe->id2.i_chain;
struct buffer_head *group_bh = NULL;
struct ocfs2_group_desc *group;
+ __le16 old_bg_contig_free_bits = 0;
/* The alloc_bh comes from ocfs2_free_dinode() or
* ocfs2_free_clusters(). The callers have all locked the
@@ -2483,9 +2583,11 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
+ if (ocfs2_is_cluster_bitmap(alloc_inode))
+ old_bg_contig_free_bits = group->bg_contig_free_bits;
status = ocfs2_block_group_clear_bits(handle, alloc_inode,
group, group_bh,
- start_bit, count, undo_fn);
+ start_bit, count, 0, undo_fn);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -2496,7 +2598,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
if (status < 0) {
mlog_errno(status);
ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh,
- start_bit, count);
+ start_bit, count,
+ le16_to_cpu(old_bg_contig_free_bits), 1);
goto bail;
}
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 9c74eace3adc..bcf2ed4a8631 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -29,6 +29,7 @@ struct ocfs2_alloc_context {
#define OCFS2_AC_USE_MAIN 2
#define OCFS2_AC_USE_INODE 3
#define OCFS2_AC_USE_META 4
+#define OCFS2_AC_USE_MAIN_DISCONTIG 5
u32 ac_which;
/* these are used by the chain search */
@@ -79,12 +80,16 @@ void ocfs2_rollback_alloc_dinode_counts(struct inode *inode,
struct buffer_head *di_bh,
u32 num_bits,
u16 chain);
+u16 ocfs2_find_max_contig_free_bits(void *bitmap,
+ u16 total_bits, u16 start);
int ocfs2_block_group_set_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
unsigned int bit_off,
- unsigned int num_bits);
+ unsigned int num_bits,
+ unsigned int max_contig_bits,
+ int fastpath);
int ocfs2_claim_metadata(handle_t *handle,
struct ocfs2_alloc_context *ac,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8aabaed2c1cb..3d2533950bae 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -19,10 +19,10 @@
#include <linux/blkdev.h>
#include <linux/socket.h>
#include <linux/inet.h>
-#include <linux/parser.h>
+#include <linux/fs_parser.h>
+#include <linux/fs_context.h>
#include <linux/crc32.h>
#include <linux/debugfs.h>
-#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/quotaops.h>
#include <linux/signal.h>
@@ -80,17 +80,15 @@ struct mount_options
unsigned int resv_level;
int dir_resv_level;
char cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
+ bool user_stack;
};
-static int ocfs2_parse_options(struct super_block *sb, char *options,
- struct mount_options *mopt,
- int is_remount);
+static int ocfs2_parse_param(struct fs_context *fc, struct fs_parameter *param);
static int ocfs2_check_set_options(struct super_block *sb,
struct mount_options *options);
static int ocfs2_show_options(struct seq_file *s, struct dentry *root);
static void ocfs2_put_super(struct super_block *sb);
static int ocfs2_mount_volume(struct super_block *sb);
-static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err);
static int ocfs2_initialize_mem_caches(void);
static void ocfs2_free_mem_caches(void);
@@ -135,7 +133,6 @@ static const struct super_operations ocfs2_sops = {
.evict_inode = ocfs2_evict_inode,
.sync_fs = ocfs2_sync_fs,
.put_super = ocfs2_put_super,
- .remount_fs = ocfs2_remount,
.show_options = ocfs2_show_options,
.quota_read = ocfs2_quota_read,
.quota_write = ocfs2_quota_write,
@@ -144,15 +141,10 @@ static const struct super_operations ocfs2_sops = {
enum {
Opt_barrier,
- Opt_err_panic,
- Opt_err_ro,
+ Opt_errors,
Opt_intr,
- Opt_nointr,
- Opt_hb_none,
- Opt_hb_local,
- Opt_hb_global,
- Opt_data_ordered,
- Opt_data_writeback,
+ Opt_heartbeat,
+ Opt_data,
Opt_atime_quantum,
Opt_slot,
Opt_commit,
@@ -160,52 +152,64 @@ enum {
Opt_localflocks,
Opt_stack,
Opt_user_xattr,
- Opt_nouser_xattr,
Opt_inode64,
Opt_acl,
- Opt_noacl,
Opt_usrquota,
Opt_grpquota,
- Opt_coherency_buffered,
- Opt_coherency_full,
+ Opt_coherency,
Opt_resv_level,
Opt_dir_resv_level,
Opt_journal_async_commit,
- Opt_err_cont,
- Opt_err,
};
-static const match_table_t tokens = {
- {Opt_barrier, "barrier=%u"},
- {Opt_err_panic, "errors=panic"},
- {Opt_err_ro, "errors=remount-ro"},
- {Opt_intr, "intr"},
- {Opt_nointr, "nointr"},
- {Opt_hb_none, OCFS2_HB_NONE},
- {Opt_hb_local, OCFS2_HB_LOCAL},
- {Opt_hb_global, OCFS2_HB_GLOBAL},
- {Opt_data_ordered, "data=ordered"},
- {Opt_data_writeback, "data=writeback"},
- {Opt_atime_quantum, "atime_quantum=%u"},
- {Opt_slot, "preferred_slot=%u"},
- {Opt_commit, "commit=%u"},
- {Opt_localalloc, "localalloc=%d"},
- {Opt_localflocks, "localflocks"},
- {Opt_stack, "cluster_stack=%s"},
- {Opt_user_xattr, "user_xattr"},
- {Opt_nouser_xattr, "nouser_xattr"},
- {Opt_inode64, "inode64"},
- {Opt_acl, "acl"},
- {Opt_noacl, "noacl"},
- {Opt_usrquota, "usrquota"},
- {Opt_grpquota, "grpquota"},
- {Opt_coherency_buffered, "coherency=buffered"},
- {Opt_coherency_full, "coherency=full"},
- {Opt_resv_level, "resv_level=%u"},
- {Opt_dir_resv_level, "dir_resv_level=%u"},
- {Opt_journal_async_commit, "journal_async_commit"},
- {Opt_err_cont, "errors=continue"},
- {Opt_err, NULL}
+static const struct constant_table ocfs2_param_errors[] = {
+ {"panic", OCFS2_MOUNT_ERRORS_PANIC},
+ {"remount-ro", OCFS2_MOUNT_ERRORS_ROFS},
+ {"continue", OCFS2_MOUNT_ERRORS_CONT},
+ {}
+};
+
+static const struct constant_table ocfs2_param_heartbeat[] = {
+ {"local", OCFS2_MOUNT_HB_LOCAL},
+ {"none", OCFS2_MOUNT_HB_NONE},
+ {"global", OCFS2_MOUNT_HB_GLOBAL},
+ {}
+};
+
+static const struct constant_table ocfs2_param_data[] = {
+ {"writeback", OCFS2_MOUNT_DATA_WRITEBACK},
+ {"ordered", 0},
+ {}
+};
+
+static const struct constant_table ocfs2_param_coherency[] = {
+ {"buffered", OCFS2_MOUNT_COHERENCY_BUFFERED},
+ {"full", 0},
+ {}
+};
+
+static const struct fs_parameter_spec ocfs2_param_spec[] = {
+ fsparam_u32 ("barrier", Opt_barrier),
+ fsparam_enum ("errors", Opt_errors, ocfs2_param_errors),
+ fsparam_flag_no ("intr", Opt_intr),
+ fsparam_enum ("heartbeat", Opt_heartbeat, ocfs2_param_heartbeat),
+ fsparam_enum ("data", Opt_data, ocfs2_param_data),
+ fsparam_u32 ("atime_quantum", Opt_atime_quantum),
+ fsparam_u32 ("preferred_slot", Opt_slot),
+ fsparam_u32 ("commit", Opt_commit),
+ fsparam_s32 ("localalloc", Opt_localalloc),
+ fsparam_flag ("localflocks", Opt_localflocks),
+ fsparam_string ("cluster_stack", Opt_stack),
+ fsparam_flag_no ("user_xattr", Opt_user_xattr),
+ fsparam_flag ("inode64", Opt_inode64),
+ fsparam_flag_no ("acl", Opt_acl),
+ fsparam_flag ("usrquota", Opt_usrquota),
+ fsparam_flag ("grpquota", Opt_grpquota),
+ fsparam_enum ("coherency", Opt_coherency, ocfs2_param_coherency),
+ fsparam_u32 ("resv_level", Opt_resv_level),
+ fsparam_u32 ("dir_resv_level", Opt_dir_resv_level),
+ fsparam_flag ("journal_async_commit", Opt_journal_async_commit),
+ {}
};
#ifdef CONFIG_DEBUG_FS
@@ -600,32 +604,32 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
return (((unsigned long long)bytes) << bitshift) - trim;
}
-static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
+static int ocfs2_reconfigure(struct fs_context *fc)
{
int incompat_features;
int ret = 0;
- struct mount_options parsed_options;
+ struct mount_options *parsed_options = fc->fs_private;
+ struct super_block *sb = fc->root->d_sb;
struct ocfs2_super *osb = OCFS2_SB(sb);
u32 tmp;
sync_filesystem(sb);
- if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
- !ocfs2_check_set_options(sb, &parsed_options)) {
+ if (!ocfs2_check_set_options(sb, parsed_options)) {
ret = -EINVAL;
goto out;
}
tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
OCFS2_MOUNT_HB_NONE;
- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
+ if ((osb->s_mount_opt & tmp) != (parsed_options->mount_opt & tmp)) {
ret = -EINVAL;
mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n");
goto out;
}
if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) !=
- (parsed_options.mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) {
+ (parsed_options->mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) {
ret = -EINVAL;
mlog(ML_ERROR, "Cannot change data mode on remount\n");
goto out;
@@ -634,16 +638,16 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
/* Probably don't want this on remount; it might
* mess with other nodes */
if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) &&
- (parsed_options.mount_opt & OCFS2_MOUNT_INODE64)) {
+ (parsed_options->mount_opt & OCFS2_MOUNT_INODE64)) {
ret = -EINVAL;
mlog(ML_ERROR, "Cannot enable inode64 on remount\n");
goto out;
}
/* We're going to/from readonly mode. */
- if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) {
+ if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
/* Disable quota accounting before remounting RO */
- if (*flags & SB_RDONLY) {
+ if (fc->sb_flags & SB_RDONLY) {
ret = ocfs2_susp_quotas(osb, 0);
if (ret < 0)
goto out;
@@ -657,7 +661,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
goto unlock_osb;
}
- if (*flags & SB_RDONLY) {
+ if (fc->sb_flags & SB_RDONLY) {
sb->s_flags |= SB_RDONLY;
osb->osb_flags |= OCFS2_OSB_SOFT_RO;
} else {
@@ -678,11 +682,11 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags &= ~SB_RDONLY;
osb->osb_flags &= ~OCFS2_OSB_SOFT_RO;
}
- trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags);
+ trace_ocfs2_remount(sb->s_flags, osb->osb_flags, fc->sb_flags);
unlock_osb:
spin_unlock(&osb->osb_lock);
/* Enable quota accounting after remounting RW */
- if (!ret && !(*flags & SB_RDONLY)) {
+ if (!ret && !(fc->sb_flags & SB_RDONLY)) {
if (sb_any_quota_suspended(sb))
ret = ocfs2_susp_quotas(osb, 1);
else
@@ -701,11 +705,11 @@ unlock_osb:
if (!ret) {
/* Only save off the new mount options in case of a successful
* remount. */
- osb->s_mount_opt = parsed_options.mount_opt;
- osb->s_atime_quantum = parsed_options.atime_quantum;
- osb->preferred_slot = parsed_options.slot;
- if (parsed_options.commit_interval)
- osb->osb_commit_interval = parsed_options.commit_interval;
+ osb->s_mount_opt = parsed_options->mount_opt;
+ osb->s_atime_quantum = parsed_options->atime_quantum;
+ osb->preferred_slot = parsed_options->slot;
+ if (parsed_options->commit_interval)
+ osb->osb_commit_interval = parsed_options->commit_interval;
if (!ocfs2_is_hard_readonly(osb))
ocfs2_set_journal_params(osb);
@@ -966,23 +970,18 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
}
}
-static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
+static int ocfs2_fill_super(struct super_block *sb, struct fs_context *fc)
{
struct dentry *root;
int status, sector_size;
- struct mount_options parsed_options;
+ struct mount_options *parsed_options = fc->fs_private;
struct inode *inode = NULL;
struct ocfs2_super *osb = NULL;
struct buffer_head *bh = NULL;
char nodestr[12];
struct ocfs2_blockcheck_stats stats;
- trace_ocfs2_fill_super(sb, data, silent);
-
- if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) {
- status = -EINVAL;
- goto out;
- }
+ trace_ocfs2_fill_super(sb, fc, fc->sb_flags & SB_SILENT);
/* probe for superblock */
status = ocfs2_sb_probe(sb, &bh, &sector_size, &stats);
@@ -999,24 +998,24 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
osb = OCFS2_SB(sb);
- if (!ocfs2_check_set_options(sb, &parsed_options)) {
+ if (!ocfs2_check_set_options(sb, parsed_options)) {
status = -EINVAL;
goto out_super;
}
- osb->s_mount_opt = parsed_options.mount_opt;
- osb->s_atime_quantum = parsed_options.atime_quantum;
- osb->preferred_slot = parsed_options.slot;
- osb->osb_commit_interval = parsed_options.commit_interval;
+ osb->s_mount_opt = parsed_options->mount_opt;
+ osb->s_atime_quantum = parsed_options->atime_quantum;
+ osb->preferred_slot = parsed_options->slot;
+ osb->osb_commit_interval = parsed_options->commit_interval;
- ocfs2_la_set_sizes(osb, parsed_options.localalloc_opt);
- osb->osb_resv_level = parsed_options.resv_level;
- osb->osb_dir_resv_level = parsed_options.resv_level;
- if (parsed_options.dir_resv_level == -1)
- osb->osb_dir_resv_level = parsed_options.resv_level;
+ ocfs2_la_set_sizes(osb, parsed_options->localalloc_opt);
+ osb->osb_resv_level = parsed_options->resv_level;
+ osb->osb_dir_resv_level = parsed_options->resv_level;
+ if (parsed_options->dir_resv_level == -1)
+ osb->osb_dir_resv_level = parsed_options->resv_level;
else
- osb->osb_dir_resv_level = parsed_options.dir_resv_level;
+ osb->osb_dir_resv_level = parsed_options->dir_resv_level;
- status = ocfs2_verify_userspace_stack(osb, &parsed_options);
+ status = ocfs2_verify_userspace_stack(osb, parsed_options);
if (status)
goto out_super;
@@ -1075,9 +1074,11 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root,
osb, &ocfs2_osb_debug_fops);
- if (ocfs2_meta_ecc(osb))
+ if (ocfs2_meta_ecc(osb)) {
+ ocfs2_initialize_journal_triggers(sb, osb->s_journal_triggers);
ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats,
osb->osb_debug_root);
+ }
status = ocfs2_mount_volume(sb);
if (status < 0)
@@ -1178,27 +1179,72 @@ out:
return status;
}
-static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *data)
+static int ocfs2_get_tree(struct fs_context *fc)
+{
+ return get_tree_bdev(fc, ocfs2_fill_super);
+}
+
+static void ocfs2_free_fc(struct fs_context *fc)
+{
+ kfree(fc->fs_private);
+}
+
+static const struct fs_context_operations ocfs2_context_ops = {
+ .parse_param = ocfs2_parse_param,
+ .get_tree = ocfs2_get_tree,
+ .reconfigure = ocfs2_reconfigure,
+ .free = ocfs2_free_fc,
+};
+
+static int ocfs2_init_fs_context(struct fs_context *fc)
{
- return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
+ struct mount_options *mopt;
+
+ mopt = kzalloc(sizeof(struct mount_options), GFP_KERNEL);
+ if (!mopt)
+ return -EINVAL;
+
+ mopt->commit_interval = 0;
+ mopt->mount_opt = OCFS2_MOUNT_NOINTR;
+ mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
+ mopt->slot = OCFS2_INVALID_SLOT;
+ mopt->localalloc_opt = -1;
+ mopt->cluster_stack[0] = '\0';
+ mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
+ mopt->dir_resv_level = -1;
+
+ fc->fs_private = mopt;
+ fc->ops = &ocfs2_context_ops;
+
+ return 0;
}
static struct file_system_type ocfs2_fs_type = {
.owner = THIS_MODULE,
.name = "ocfs2",
- .mount = ocfs2_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
- .next = NULL
+ .next = NULL,
+ .init_fs_context = ocfs2_init_fs_context,
+ .parameters = ocfs2_param_spec,
};
MODULE_ALIAS_FS("ocfs2");
static int ocfs2_check_set_options(struct super_block *sb,
struct mount_options *options)
{
+ if (options->user_stack == 0) {
+ u32 tmp;
+
+ /* Ensure only one heartbeat mode */
+ tmp = options->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
+ OCFS2_MOUNT_HB_GLOBAL |
+ OCFS2_MOUNT_HB_NONE);
+ if (hweight32(tmp) != 1) {
+ mlog(ML_ERROR, "Invalid heartbeat mount options\n");
+ return 0;
+ }
+ }
if (options->mount_opt & OCFS2_MOUNT_USRQUOTA &&
!OCFS2_HAS_RO_COMPAT_FEATURE(sb,
OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
@@ -1230,241 +1276,142 @@ static int ocfs2_check_set_options(struct super_block *sb,
return 1;
}
-static int ocfs2_parse_options(struct super_block *sb,
- char *options,
- struct mount_options *mopt,
- int is_remount)
+static int ocfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
- int status, user_stack = 0;
- char *p;
- u32 tmp;
- int token, option;
- substring_t args[MAX_OPT_ARGS];
-
- trace_ocfs2_parse_options(is_remount, options ? options : "(none)");
-
- mopt->commit_interval = 0;
- mopt->mount_opt = OCFS2_MOUNT_NOINTR;
- mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
- mopt->slot = OCFS2_INVALID_SLOT;
- mopt->localalloc_opt = -1;
- mopt->cluster_stack[0] = '\0';
- mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
- mopt->dir_resv_level = -1;
-
- if (!options) {
- status = 1;
- goto bail;
- }
-
- while ((p = strsep(&options, ",")) != NULL) {
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_hb_local:
- mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL;
- break;
- case Opt_hb_none:
- mopt->mount_opt |= OCFS2_MOUNT_HB_NONE;
- break;
- case Opt_hb_global:
- mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL;
- break;
- case Opt_barrier:
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option)
- mopt->mount_opt |= OCFS2_MOUNT_BARRIER;
- else
- mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER;
- break;
- case Opt_intr:
- mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR;
- break;
- case Opt_nointr:
+ struct fs_parse_result result;
+ int opt;
+ struct mount_options *mopt = fc->fs_private;
+ bool is_remount = (fc->purpose & FS_CONTEXT_FOR_RECONFIGURE);
+
+ trace_ocfs2_parse_options(is_remount, param->key);
+
+ opt = fs_parse(fc, ocfs2_param_spec, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_heartbeat:
+ mopt->mount_opt |= result.uint_32;
+ break;
+ case Opt_barrier:
+ if (result.uint_32)
+ mopt->mount_opt |= OCFS2_MOUNT_BARRIER;
+ else
+ mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER;
+ break;
+ case Opt_intr:
+ if (result.negated)
mopt->mount_opt |= OCFS2_MOUNT_NOINTR;
- break;
- case Opt_err_panic:
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT;
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS;
- mopt->mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
- break;
- case Opt_err_ro:
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT;
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
- mopt->mount_opt |= OCFS2_MOUNT_ERRORS_ROFS;
- break;
- case Opt_err_cont:
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS;
- mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC;
- mopt->mount_opt |= OCFS2_MOUNT_ERRORS_CONT;
- break;
- case Opt_data_ordered:
- mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
- break;
- case Opt_data_writeback:
- mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK;
- break;
- case Opt_user_xattr:
- mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR;
- break;
- case Opt_nouser_xattr:
+ else
+ mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR;
+ break;
+ case Opt_errors:
+ mopt->mount_opt &= ~(OCFS2_MOUNT_ERRORS_CONT |
+ OCFS2_MOUNT_ERRORS_ROFS |
+ OCFS2_MOUNT_ERRORS_PANIC);
+ mopt->mount_opt |= result.uint_32;
+ break;
+ case Opt_data:
+ mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK;
+ mopt->mount_opt |= result.uint_32;
+ break;
+ case Opt_user_xattr:
+ if (result.negated)
mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR;
- break;
- case Opt_atime_quantum:
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option >= 0)
- mopt->atime_quantum = option;
- break;
- case Opt_slot:
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option)
- mopt->slot = (u16)option;
- break;
- case Opt_commit:
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option < 0)
- return 0;
- if (option == 0)
- option = JBD2_DEFAULT_MAX_COMMIT_AGE;
- mopt->commit_interval = HZ * option;
- break;
- case Opt_localalloc:
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option >= 0)
- mopt->localalloc_opt = option;
- break;
- case Opt_localflocks:
- /*
- * Changing this during remount could race
- * flock() requests, or "unbalance" existing
- * ones (e.g., a lock is taken in one mode but
- * dropped in the other). If users care enough
- * to flip locking modes during remount, we
- * could add a "local" flag to individual
- * flock structures for proper tracking of
- * state.
- */
- if (!is_remount)
- mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS;
- break;
- case Opt_stack:
- /* Check both that the option we were passed
- * is of the right length and that it is a proper
- * string of the right length.
- */
- if (((args[0].to - args[0].from) !=
- OCFS2_STACK_LABEL_LEN) ||
- (strnlen(args[0].from,
- OCFS2_STACK_LABEL_LEN) !=
- OCFS2_STACK_LABEL_LEN)) {
- mlog(ML_ERROR,
- "Invalid cluster_stack option\n");
- status = 0;
- goto bail;
- }
- memcpy(mopt->cluster_stack, args[0].from,
- OCFS2_STACK_LABEL_LEN);
- mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
- /*
- * Open code the memcmp here as we don't have
- * an osb to pass to
- * ocfs2_userspace_stack().
- */
- if (memcmp(mopt->cluster_stack,
- OCFS2_CLASSIC_CLUSTER_STACK,
- OCFS2_STACK_LABEL_LEN))
- user_stack = 1;
- break;
- case Opt_inode64:
- mopt->mount_opt |= OCFS2_MOUNT_INODE64;
- break;
- case Opt_usrquota:
- mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
- break;
- case Opt_grpquota:
- mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
- break;
- case Opt_coherency_buffered:
- mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED;
- break;
- case Opt_coherency_full:
- mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED;
- break;
- case Opt_acl:
- mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
- mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
- break;
- case Opt_noacl:
+ else
+ mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR;
+ break;
+ case Opt_atime_quantum:
+ mopt->atime_quantum = result.uint_32;
+ break;
+ case Opt_slot:
+ if (result.uint_32)
+ mopt->slot = (u16)result.uint_32;
+ break;
+ case Opt_commit:
+ if (result.uint_32 == 0)
+ mopt->commit_interval = HZ * JBD2_DEFAULT_MAX_COMMIT_AGE;
+ else
+ mopt->commit_interval = HZ * result.uint_32;
+ break;
+ case Opt_localalloc:
+ if (result.int_32 >= 0)
+ mopt->localalloc_opt = result.int_32;
+ break;
+ case Opt_localflocks:
+ /*
+ * Changing this during remount could race flock() requests, or
+ * "unbalance" existing ones (e.g., a lock is taken in one mode
+ * but dropped in the other). If users care enough to flip
+ * locking modes during remount, we could add a "local" flag to
+ * individual flock structures for proper tracking of state.
+ */
+ if (!is_remount)
+ mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS;
+ break;
+ case Opt_stack:
+ /* Check both that the option we were passed is of the right
+ * length and that it is a proper string of the right length.
+ */
+ if (strlen(param->string) != OCFS2_STACK_LABEL_LEN) {
+ mlog(ML_ERROR, "Invalid cluster_stack option\n");
+ return -EINVAL;
+ }
+ memcpy(mopt->cluster_stack, param->string, OCFS2_STACK_LABEL_LEN);
+ mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
+ /*
+ * Open code the memcmp here as we don't have an osb to pass
+ * to ocfs2_userspace_stack().
+ */
+ if (memcmp(mopt->cluster_stack,
+ OCFS2_CLASSIC_CLUSTER_STACK,
+ OCFS2_STACK_LABEL_LEN))
+ mopt->user_stack = 1;
+ break;
+ case Opt_inode64:
+ mopt->mount_opt |= OCFS2_MOUNT_INODE64;
+ break;
+ case Opt_usrquota:
+ mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
+ break;
+ case Opt_grpquota:
+ mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
+ break;
+ case Opt_coherency:
+ mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED;
+ mopt->mount_opt |= result.uint_32;
+ break;
+ case Opt_acl:
+ if (result.negated) {
mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
+ } else {
+ mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
+ mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
+ }
+ break;
+ case Opt_resv_level:
+ if (is_remount)
break;
- case Opt_resv_level:
- if (is_remount)
- break;
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option >= OCFS2_MIN_RESV_LEVEL &&
- option < OCFS2_MAX_RESV_LEVEL)
- mopt->resv_level = option;
- break;
- case Opt_dir_resv_level:
- if (is_remount)
- break;
- if (match_int(&args[0], &option)) {
- status = 0;
- goto bail;
- }
- if (option >= OCFS2_MIN_RESV_LEVEL &&
- option < OCFS2_MAX_RESV_LEVEL)
- mopt->dir_resv_level = option;
- break;
- case Opt_journal_async_commit:
- mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
+ if (result.uint_32 >= OCFS2_MIN_RESV_LEVEL &&
+ result.uint_32 < OCFS2_MAX_RESV_LEVEL)
+ mopt->resv_level = result.uint_32;
+ break;
+ case Opt_dir_resv_level:
+ if (is_remount)
break;
- default:
- mlog(ML_ERROR,
- "Unrecognized mount option \"%s\" "
- "or missing value\n", p);
- status = 0;
- goto bail;
- }
- }
-
- if (user_stack == 0) {
- /* Ensure only one heartbeat mode */
- tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL |
- OCFS2_MOUNT_HB_GLOBAL |
- OCFS2_MOUNT_HB_NONE);
- if (hweight32(tmp) != 1) {
- mlog(ML_ERROR, "Invalid heartbeat mount options\n");
- status = 0;
- goto bail;
- }
+ if (result.uint_32 >= OCFS2_MIN_RESV_LEVEL &&
+ result.uint_32 < OCFS2_MAX_RESV_LEVEL)
+ mopt->dir_resv_level = result.uint_32;
+ break;
+ case Opt_journal_async_commit:
+ mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
+ break;
+ default:
+ return -EINVAL;
}
- status = 1;
-
-bail:
- return status;
+ return 0;
}
static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
@@ -1569,15 +1516,13 @@ static int __init ocfs2_init(void)
ocfs2_set_locking_protocol();
- status = register_quota_format(&ocfs2_quota_format);
- if (status < 0)
- goto out3;
+ register_quota_format(&ocfs2_quota_format);
+
status = register_filesystem(&ocfs2_fs_type);
if (!status)
return 0;
unregister_quota_format(&ocfs2_quota_format);
-out3:
debugfs_remove(ocfs2_debugfs_root);
ocfs2_free_mem_caches();
out2:
@@ -1858,7 +1803,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
osb = OCFS2_SB(sb);
BUG_ON(!osb);
- /* Remove file check sysfs related directores/files,
+ /* Remove file check sysfs related directories/files,
* and wait for the pending file check operations */
ocfs2_filecheck_remove_sysfs(osb);
@@ -1867,6 +1812,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
/* Orphan scan should be stopped as early as possible */
ocfs2_orphan_scan_stop(osb);
+ /* Stop quota recovery so that we can disable quotas */
+ ocfs2_recovery_disable_quota(osb);
+
ocfs2_disable_quotas(osb);
/* All dquots should be freed by now */
@@ -2319,6 +2267,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
struct ocfs2_blockcheck_stats *stats)
{
int status = -EAGAIN;
+ u32 blksz_bits;
if (memcmp(di->i_signature, OCFS2_SUPER_BLOCK_SIGNATURE,
strlen(OCFS2_SUPER_BLOCK_SIGNATURE)) == 0) {
@@ -2333,11 +2282,15 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
goto out;
}
status = -EINVAL;
- if ((1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits)) != blksz) {
+ /* Acceptable block sizes are 512 bytes, 1K, 2K and 4K. */
+ blksz_bits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
+ if (blksz_bits < 9 || blksz_bits > 12) {
+ mlog(ML_ERROR, "found superblock with incorrect block "
+ "size bits: found %u, should be 9, 10, 11, or 12\n",
+ blksz_bits);
+ } else if ((1 << blksz_bits) != blksz) {
mlog(ML_ERROR, "found superblock with incorrect block "
- "size: found %u, should be %u\n",
- 1 << le32_to_cpu(di->id2.i_super.s_blocksize_bits),
- blksz);
+ "size: found %u, should be %u\n", 1 << blksz_bits, blksz);
} else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) !=
OCFS2_MAJOR_REV_LEVEL ||
le16_to_cpu(di->id2.i_super.s_minor_rev_level) !=
@@ -2355,8 +2308,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
(unsigned long long)bh->b_blocknr);
} else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 ||
le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) {
- mlog(ML_ERROR, "bad cluster size found: %u\n",
- 1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits));
+ mlog(ML_ERROR, "bad cluster size bit found: %u\n",
+ le32_to_cpu(di->id2.i_super.s_clustersize_bits));
} else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) {
mlog(ML_ERROR, "bad root_blkno: 0\n");
} else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) {
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index d4c5fdcfa1e4..ad8be3300b49 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -54,31 +54,27 @@
static int ocfs2_fast_symlink_read_folio(struct file *f, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = folio->mapping->host;
struct buffer_head *bh = NULL;
int status = ocfs2_read_inode_block(inode, &bh);
struct ocfs2_dinode *fe;
const char *link;
- void *kaddr;
size_t len;
if (status < 0) {
mlog_errno(status);
- return status;
+ goto out;
}
fe = (struct ocfs2_dinode *) bh->b_data;
link = (char *) fe->id2.i_symlink;
/* will be less than a page size */
len = strnlen(link, ocfs2_fast_symlink_chars(inode->i_sb));
- kaddr = kmap_atomic(page);
- memcpy(kaddr, link, len + 1);
- kunmap_atomic(kaddr);
- SetPageUptodate(page);
- unlock_page(page);
+ memcpy_to_folio(folio, 0, link, len + 1);
+out:
+ folio_end_read(folio, status == 0);
brelse(bh);
- return 0;
+ return status;
}
const struct address_space_operations ocfs2_fast_symlink_aops = {
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3b81213ed7b8..d70a20d29e3e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -648,7 +648,7 @@ int ocfs2_calc_xattr_init(struct inode *dir,
* 256(name) + 80(value) + 16(entry) = 352 bytes,
* The max space of acl xattr taken inline is
* 80(value) + 16(entry) * 2(if directory) = 192 bytes,
- * when blocksize = 512, may reserve one more cluser for
+ * when blocksize = 512, may reserve one more cluster for
* xattr bucket, otherwise reserve one metadata block
* for them is ok.
* If this is a new directory with inline data,
@@ -1062,13 +1062,13 @@ ssize_t ocfs2_listxattr(struct dentry *dentry,
return i_ret + b_ret;
}
-static int ocfs2_xattr_find_entry(int name_index,
+static int ocfs2_xattr_find_entry(struct inode *inode, int name_index,
const char *name,
struct ocfs2_xattr_search *xs)
{
struct ocfs2_xattr_entry *entry;
size_t name_len;
- int i, cmp = 1;
+ int i, name_offset, cmp = 1;
if (name == NULL)
return -EINVAL;
@@ -1076,13 +1076,22 @@ static int ocfs2_xattr_find_entry(int name_index,
name_len = strlen(name);
entry = xs->here;
for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
+ if ((void *)entry >= xs->end) {
+ ocfs2_error(inode->i_sb, "corrupted xattr entries");
+ return -EFSCORRUPTED;
+ }
cmp = name_index - ocfs2_xattr_get_type(entry);
if (!cmp)
cmp = name_len - entry->xe_name_len;
- if (!cmp)
- cmp = memcmp(name, (xs->base +
- le16_to_cpu(entry->xe_name_offset)),
- name_len);
+ if (!cmp) {
+ name_offset = le16_to_cpu(entry->xe_name_offset);
+ if ((xs->base + name_offset + name_len) > xs->end) {
+ ocfs2_error(inode->i_sb,
+ "corrupted xattr entries");
+ return -EFSCORRUPTED;
+ }
+ cmp = memcmp(name, (xs->base + name_offset), name_len);
+ }
if (cmp == 0)
break;
entry += 1;
@@ -1166,7 +1175,7 @@ static int ocfs2_xattr_ibody_get(struct inode *inode,
xs->base = (void *)xs->header;
xs->here = xs->header->xh_entries;
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
if (ret)
return ret;
size = le64_to_cpu(xs->here->xe_value_size);
@@ -2027,8 +2036,7 @@ static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
rc = 0;
ocfs2_xa_cleanup_value_truncate(loc, "removing",
orig_clusters);
- if (rc)
- goto out;
+ goto out;
}
}
@@ -2698,7 +2706,7 @@ static int ocfs2_xattr_ibody_find(struct inode *inode,
/* Find the named attribute. */
if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
if (ret && ret != -ENODATA)
return ret;
xs->not_found = ret;
@@ -2833,7 +2841,7 @@ static int ocfs2_xattr_block_find(struct inode *inode,
xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
xs->here = xs->header->xh_entries;
- ret = ocfs2_xattr_find_entry(name_index, name, xs);
+ ret = ocfs2_xattr_find_entry(inode, name_index, name, xs);
} else
ret = ocfs2_xattr_index_block_find(inode, blk_bh,
name_index,
@@ -4158,15 +4166,6 @@ static int cmp_xe(const void *a, const void *b)
return 0;
}
-static void swap_xe(void *a, void *b, int size)
-{
- struct ocfs2_xattr_entry *l = a, *r = b, tmp;
-
- tmp = *l;
- memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
- memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
-}
-
/*
* When the ocfs2_xattr_block is filled up, new bucket will be created
* and all the xattr entries will be moved to the new bucket.
@@ -4232,7 +4231,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change);
sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
- cmp_xe, swap_xe);
+ cmp_xe, NULL);
}
/*
@@ -4372,7 +4371,7 @@ static int cmp_xe_offset(const void *a, const void *b)
/*
* defrag a xattr bucket if we find that the bucket has some
- * holes beteen name/value pairs.
+ * holes between name/value pairs.
* We will move all the name/value pairs to the end of the bucket
* so that we can spare some space for insertion.
*/
@@ -4427,7 +4426,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
*/
sort(entries, le16_to_cpu(xh->xh_count),
sizeof(struct ocfs2_xattr_entry),
- cmp_xe_offset, swap_xe);
+ cmp_xe_offset, NULL);
/* Move all name/values to the end of the bucket. */
xe = xh->xh_entries;
@@ -4469,7 +4468,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode,
/* sort the entries by their name_hash. */
sort(entries, le16_to_cpu(xh->xh_count),
sizeof(struct ocfs2_xattr_entry),
- cmp_xe, swap_xe);
+ cmp_xe, NULL);
buf = bucket_buf;
for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
@@ -5012,7 +5011,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
* 2. If cluster_size == bucket_size:
* a) If the previous extent rec has more than one cluster and the insert
* place isn't in the last cluster, copy the entire last cluster to the
- * new one. This time, we don't need to upate the first_bh and header_bh
+ * new one. This time, we don't need to update the first_bh and header_bh
* since they will not be moved into the new cluster.
* b) Otherwise, move the bottom half of the xattrs in the last cluster into
* the new one. And we set the extend flag to zero if the insert place is
@@ -6190,7 +6189,7 @@ struct ocfs2_xattr_reflink {
/*
* Given a xattr header and xe offset,
* return the proper xv and the corresponding bh.
- * xattr in inode, block and xattr tree have different implementaions.
+ * xattr in inode, block and xattr tree have different implementations.
*/
typedef int (get_xattr_value_root)(struct super_block *sb,
struct buffer_head *bh,
@@ -6270,7 +6269,7 @@ static int ocfs2_get_xattr_value_root(struct super_block *sb,
}
/*
- * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
+ * Lock the meta_ac and calculate how much credits we need for reflink xattrs.
* It is only used for inline xattr and xattr block.
*/
static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
@@ -6511,16 +6510,7 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
}
new_oi = OCFS2_I(args->new_inode);
- /*
- * Adjust extent record count to reserve space for extended attribute.
- * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
- */
- if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
- !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
- struct ocfs2_extent_list *el = &new_di->id2.i_list;
- le16_add_cpu(&el->l_count, -(inline_size /
- sizeof(struct ocfs2_extent_rec)));
- }
+
spin_lock(&new_oi->ip_lock);
new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);