summaryrefslogtreecommitdiff
path: root/fs/ocfs2/journal.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/journal.c')
-rw-r--r--fs/ocfs2/journal.c330
1 files changed, 205 insertions, 125 deletions
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 3fb98b4569a2..85239807dec7 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -15,6 +15,7 @@
#include <linux/time.h>
#include <linux/random.h>
#include <linux/delay.h>
+#include <linux/writeback.h>
#include <cluster/masklog.h>
@@ -89,7 +90,7 @@ enum ocfs2_replay_state {
struct ocfs2_replay_map {
unsigned int rm_slots;
enum ocfs2_replay_state rm_state;
- unsigned char rm_replay_slots[];
+ unsigned char rm_replay_slots[] __counted_by(rm_slots);
};
static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
@@ -113,9 +114,9 @@ int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
if (osb->replay_map)
return 0;
- replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
- (osb->max_slots * sizeof(char)), GFP_KERNEL);
-
+ replay_map = kzalloc(struct_size(replay_map, rm_replay_slots,
+ osb->max_slots),
+ GFP_KERNEL);
if (!replay_map) {
mlog_errno(-ENOMEM);
return -ENOMEM;
@@ -173,50 +174,69 @@ int ocfs2_recovery_init(struct ocfs2_super *osb)
struct ocfs2_recovery_map *rm;
mutex_init(&osb->recovery_lock);
- osb->disable_recovery = 0;
+ osb->recovery_state = OCFS2_REC_ENABLED;
osb->recovery_thread_task = NULL;
init_waitqueue_head(&osb->recovery_event);
- rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
- osb->max_slots * sizeof(unsigned int),
+ rm = kzalloc(struct_size(rm, rm_entries, osb->max_slots),
GFP_KERNEL);
if (!rm) {
mlog_errno(-ENOMEM);
return -ENOMEM;
}
- rm->rm_entries = (unsigned int *)((char *)rm +
- sizeof(struct ocfs2_recovery_map));
osb->recovery_map = rm;
return 0;
}
-/* we can't grab the goofy sem lock from inside wait_event, so we use
- * memory barriers to make sure that we'll see the null task before
- * being woken up */
static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
{
- mb();
return osb->recovery_thread_task != NULL;
}
-void ocfs2_recovery_exit(struct ocfs2_super *osb)
+static void ocfs2_recovery_disable(struct ocfs2_super *osb,
+ enum ocfs2_recovery_state state)
{
- struct ocfs2_recovery_map *rm;
-
- /* disable any new recovery threads and wait for any currently
- * running ones to exit. Do this before setting the vol_state. */
mutex_lock(&osb->recovery_lock);
- osb->disable_recovery = 1;
+ /*
+ * If recovery thread is not running, we can directly transition to
+ * final state.
+ */
+ if (!ocfs2_recovery_thread_running(osb)) {
+ osb->recovery_state = state + 1;
+ goto out_lock;
+ }
+ osb->recovery_state = state;
+ /* Wait for recovery thread to acknowledge state transition */
+ wait_event_cmd(osb->recovery_event,
+ !ocfs2_recovery_thread_running(osb) ||
+ osb->recovery_state >= state + 1,
+ mutex_unlock(&osb->recovery_lock),
+ mutex_lock(&osb->recovery_lock));
+out_lock:
mutex_unlock(&osb->recovery_lock);
- wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
- /* At this point, we know that no more recovery threads can be
- * launched, so wait for any recovery completion work to
- * complete. */
+ /*
+ * At this point we know that no more recovery work can be queued so
+ * wait for any recovery completion work to complete.
+ */
if (osb->ocfs2_wq)
flush_workqueue(osb->ocfs2_wq);
+}
+
+void ocfs2_recovery_disable_quota(struct ocfs2_super *osb)
+{
+ ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE);
+}
+
+void ocfs2_recovery_exit(struct ocfs2_super *osb)
+{
+ struct ocfs2_recovery_map *rm;
+
+ /* disable any new recovery threads and wait for any currently
+ * running ones to exit. Do this before setting the vol_state. */
+ ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE);
/*
* Now that recovery is shut down, and the osb is about to be
@@ -448,6 +468,23 @@ bail:
}
/*
+ * Make sure handle has at least 'nblocks' credits available. If it does not
+ * have that many credits available, we will try to extend the handle to have
+ * enough credits. If that fails, we will restart transaction to have enough
+ * credits. Similar notes regarding data consistency and locking implications
+ * as for ocfs2_extend_trans() apply here.
+ */
+int ocfs2_assure_trans_credits(handle_t *handle, int nblocks)
+{
+ int old_nblks = jbd2_handle_buffer_credits(handle);
+
+ trace_ocfs2_assure_trans_credits(old_nblks);
+ if (old_nblks >= nblocks)
+ return 0;
+ return ocfs2_extend_trans(handle, nblocks - old_nblks);
+}
+
+/*
* If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA.
* If that fails, restart the transaction & regain write access for the
* buffer head which is used for metadata modifications.
@@ -481,12 +518,6 @@ bail:
return status;
}
-
-struct ocfs2_triggers {
- struct jbd2_buffer_trigger_type ot_triggers;
- int ot_offset;
-};
-
static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers)
{
return container_of(triggers, struct ocfs2_triggers, ot_triggers);
@@ -550,85 +581,76 @@ static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers,
static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
struct buffer_head *bh)
{
+ struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers);
+
mlog(ML_ERROR,
"ocfs2_abort_trigger called by JBD2. bh = 0x%lx, "
"bh->b_blocknr = %llu\n",
(unsigned long)bh,
(unsigned long long)bh->b_blocknr);
- ocfs2_error(bh->b_bdev->bd_super,
+ ocfs2_error(ot->sb,
"JBD2 has aborted our journal, ocfs2 cannot continue\n");
}
-static struct ocfs2_triggers di_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dinode, i_check),
-};
-
-static struct ocfs2_triggers eb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
-};
-
-static struct ocfs2_triggers rb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
-};
-
-static struct ocfs2_triggers gd_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_group_desc, bg_check),
-};
-
-static struct ocfs2_triggers db_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_db_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
-};
+static void ocfs2_setup_csum_triggers(struct super_block *sb,
+ enum ocfs2_journal_trigger_type type,
+ struct ocfs2_triggers *ot)
+{
+ BUG_ON(type >= OCFS2_JOURNAL_TRIGGER_COUNT);
-static struct ocfs2_triggers xb_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check),
-};
+ switch (type) {
+ case OCFS2_JTR_DI:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dinode, i_check);
+ break;
+ case OCFS2_JTR_EB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_extent_block, h_check);
+ break;
+ case OCFS2_JTR_RB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_refcount_block, rf_check);
+ break;
+ case OCFS2_JTR_GD:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_group_desc, bg_check);
+ break;
+ case OCFS2_JTR_DB:
+ ot->ot_triggers.t_frozen = ocfs2_db_frozen_trigger;
+ break;
+ case OCFS2_JTR_XB:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_xattr_block, xb_check);
+ break;
+ case OCFS2_JTR_DQ:
+ ot->ot_triggers.t_frozen = ocfs2_dq_frozen_trigger;
+ break;
+ case OCFS2_JTR_DR:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check);
+ break;
+ case OCFS2_JTR_DL:
+ ot->ot_triggers.t_frozen = ocfs2_frozen_trigger;
+ ot->ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check);
+ break;
+ case OCFS2_JTR_NONE:
+ /* To make compiler happy... */
+ return;
+ }
-static struct ocfs2_triggers dq_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_dq_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
-};
+ ot->ot_triggers.t_abort = ocfs2_abort_trigger;
+ ot->sb = sb;
+}
-static struct ocfs2_triggers dr_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check),
-};
+void ocfs2_initialize_journal_triggers(struct super_block *sb,
+ struct ocfs2_triggers triggers[])
+{
+ enum ocfs2_journal_trigger_type type;
-static struct ocfs2_triggers dl_triggers = {
- .ot_triggers = {
- .t_frozen = ocfs2_frozen_trigger,
- .t_abort = ocfs2_abort_trigger,
- },
- .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check),
-};
+ for (type = OCFS2_JTR_DI; type < OCFS2_JOURNAL_TRIGGER_COUNT; type++)
+ ocfs2_setup_csum_triggers(sb, type, &triggers[type]);
+}
static int __ocfs2_journal_access(handle_t *handle,
struct ocfs2_caching_info *ci,
@@ -710,56 +732,91 @@ static int __ocfs2_journal_access(handle_t *handle,
int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DI],
+ type);
}
int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_EB],
+ type);
}
int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_RB],
type);
}
int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_GD],
+ type);
}
int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DB],
+ type);
}
int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_XB],
+ type);
}
int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DQ],
+ type);
}
int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DR],
+ type);
}
int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
struct buffer_head *bh, int type)
{
- return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
+ struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
+
+ return __ocfs2_journal_access(handle, ci, bh,
+ &osb->s_journal_triggers[OCFS2_JTR_DL],
+ type);
}
int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
@@ -779,14 +836,16 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
mlog_errno(status);
if (!is_handle_aborted(handle)) {
journal_t *journal = handle->h_transaction->t_journal;
- struct super_block *sb = bh->b_bdev->bd_super;
- mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
- "Aborting transaction and journal.\n");
+ mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed: "
+ "handle type %u started at line %u, credits %u/%u "
+ "errcode %d. Aborting transaction and journal.\n",
+ handle->h_type, handle->h_line_no,
+ handle->h_requested_credits,
+ jbd2_handle_buffer_credits(handle), status);
handle->h_err = status;
jbd2_journal_abort_handle(handle);
jbd2_journal_abort(journal, status);
- ocfs2_abort(sb, "Journal already aborted.\n");
}
}
}
@@ -841,6 +900,12 @@ bail:
return status;
}
+static int ocfs2_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
+{
+ return filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
+ jinode->i_dirty_start, jinode->i_dirty_end);
+}
+
int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
{
int status = -1;
@@ -897,9 +962,9 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
/* call the kernels journal init function now */
j_journal = jbd2_journal_init_inode(inode);
- if (j_journal == NULL) {
+ if (IS_ERR(j_journal)) {
mlog(ML_ERROR, "Linux journal layer error\n");
- status = -EINVAL;
+ status = PTR_ERR(j_journal);
goto done;
}
@@ -910,7 +975,7 @@ int ocfs2_journal_init(struct ocfs2_super *osb, int *dirty)
journal->j_journal = j_journal;
journal->j_journal->j_submit_inode_data_buffers =
- jbd2_journal_submit_inode_data_buffers;
+ ocfs2_journal_submit_inode_data_buffers;
journal->j_journal->j_finish_inode_data_buffers =
jbd2_journal_finish_inode_data_buffers;
journal->j_inode = inode;
@@ -1005,7 +1070,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
if (!igrab(inode))
BUG();
- num_running_trans = atomic_read(&(osb->journal->j_num_trans));
+ num_running_trans = atomic_read(&(journal->j_num_trans));
trace_ocfs2_journal_shutdown(num_running_trans);
/* Do a commit_cache here. It will flush our journal, *and*
@@ -1024,9 +1089,10 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
osb->commit_task = NULL;
}
- BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
+ BUG_ON(atomic_read(&(journal->j_num_trans)) != 0);
- if (ocfs2_mount_local(osb)) {
+ if (ocfs2_mount_local(osb) &&
+ (journal->j_journal->j_flags & JBD2_LOADED)) {
jbd2_journal_lock_updates(journal->j_journal);
status = jbd2_journal_flush(journal->j_journal, 0);
jbd2_journal_unlock_updates(journal->j_journal);
@@ -1198,7 +1264,7 @@ static int ocfs2_force_read_journal(struct inode *inode)
}
for (i = 0; i < p_blocks; i++, p_blkno++) {
- bh = __find_get_block(osb->sb->s_bdev, p_blkno,
+ bh = __find_get_block_nonatomic(osb->sb->s_bdev, p_blkno,
osb->sb->s_blocksize);
/* block not cached. */
if (!bh)
@@ -1421,6 +1487,18 @@ static int __ocfs2_recovery_thread(void *arg)
}
}
restart:
+ if (quota_enabled) {
+ mutex_lock(&osb->recovery_lock);
+ /* Confirm that recovery thread will no longer recover quotas */
+ if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) {
+ osb->recovery_state = OCFS2_REC_QUOTA_DISABLED;
+ wake_up(&osb->recovery_event);
+ }
+ if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED)
+ quota_enabled = 0;
+ mutex_unlock(&osb->recovery_lock);
+ }
+
status = ocfs2_super_lock(osb, 1);
if (status < 0) {
mlog_errno(status);
@@ -1518,27 +1596,29 @@ bail:
ocfs2_free_replay_slots(osb);
osb->recovery_thread_task = NULL;
- mb(); /* sync with ocfs2_recovery_thread_running */
+ if (osb->recovery_state == OCFS2_REC_WANT_DISABLE)
+ osb->recovery_state = OCFS2_REC_DISABLED;
wake_up(&osb->recovery_event);
mutex_unlock(&osb->recovery_lock);
- if (quota_enabled)
- kfree(rm_quota);
+ kfree(rm_quota);
return status;
}
void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num)
{
+ int was_set = -1;
+
mutex_lock(&osb->recovery_lock);
+ if (osb->recovery_state < OCFS2_REC_WANT_DISABLE)
+ was_set = ocfs2_recovery_map_set(osb, node_num);
trace_ocfs2_recovery_thread(node_num, osb->node_num,
- osb->disable_recovery, osb->recovery_thread_task,
- osb->disable_recovery ?
- -1 : ocfs2_recovery_map_set(osb, node_num));
+ osb->recovery_state, osb->recovery_thread_task, was_set);
- if (osb->disable_recovery)
+ if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE)
goto out;
if (osb->recovery_thread_task)
@@ -1673,9 +1753,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
}
journal = jbd2_journal_init_inode(inode);
- if (journal == NULL) {
+ if (IS_ERR(journal)) {
mlog(ML_ERROR, "Linux journal layer error\n");
- status = -EIO;
+ status = PTR_ERR(journal);
goto done;
}
@@ -1905,7 +1985,7 @@ bail:
/*
* Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some
- * randomness to the timeout to minimize multple nodes firing the timer at the
+ * randomness to the timeout to minimize multiple nodes firing the timer at the
* same time.
*/
static inline unsigned long ocfs2_orphan_scan_timeout(void)