diff options
Diffstat (limited to 'fs/gfs2/lock_dlm.c')
| -rw-r--r-- | fs/gfs2/lock_dlm.c | 367 |
1 files changed, 244 insertions, 123 deletions
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index c8423d6de6c3..b8d249925395 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -1,59 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. * Copyright 2004-2011 Red Hat, Inc. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License version 2. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/fs.h> #include <linux/dlm.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/delay.h> #include <linux/gfs2_ondisk.h> +#include <linux/sched/signal.h> #include "incore.h" -#include "glock.h" #include "util.h" #include "sys.h" #include "trace_gfs2.h" -extern struct workqueue_struct *gfs2_control_wq; - /** * gfs2_update_stats - Update time based stats - * @mv: Pointer to mean/variance structure to update + * @s: The stats to update (local or global) + * @index: The index inside @s * @sample: New data to include - * - * @delta is the difference between the current rtt sample and the - * running average srtt. We add 1/8 of that to the srtt in order to - * update the current srtt estimate. The varience estimate is a bit - * more complicated. We subtract the abs value of the @delta from - * the current variance estimate and add 1/4 of that to the running - * total. - * - * Note that the index points at the array entry containing the smoothed - * mean value, and the variance is always in the following entry - * - * Reference: TCP/IP Illustrated, vol 2, p. 831,832 - * All times are in units of integer nanoseconds. Unlike the TCP/IP case, - * they are not scaled fixed point. */ - static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index, s64 sample) { + /* + * @delta is the difference between the current rtt sample and the + * running average srtt. We add 1/8 of that to the srtt in order to + * update the current srtt estimate. The variance estimate is a bit + * more complicated. We subtract the current variance estimate from + * the abs value of the @delta and add 1/4 of that to the running + * total. That's equivalent to 3/4 of the current variance + * estimate plus 1/4 of the abs of @delta. + * + * Note that the index points at the array entry containing the + * smoothed mean value, and the variance is always in the following + * entry + * + * Reference: TCP/IP Illustrated, vol 2, p. 831,832 + * All times are in units of integer nanoseconds. Unlike the TCP/IP + * case, they are not scaled fixed point. + */ + s64 delta = sample - s->stats[index]; s->stats[index] += (delta >> 3); index++; - s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2); + s->stats[index] += (s64)(abs(delta) - s->stats[index]) >> 2; } /** * gfs2_update_reply_times - Update locking statistics * @gl: The glock to update + * @blocking: The operation may have been blocking * * This assumes that gl->gl_dstamp has been set earlier. * @@ -68,17 +70,17 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index, * TRY_1CB flags are set are classified as non-blocking. All * other DLM requests are counted as (potentially) blocking. */ -static inline void gfs2_update_reply_times(struct gfs2_glock *gl) +static inline void gfs2_update_reply_times(struct gfs2_glock *gl, + bool blocking) { struct gfs2_pcpu_lkstats *lks; const unsigned gltype = gl->gl_name.ln_type; - unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ? - GFS2_LKS_SRTTB : GFS2_LKS_SRTT; + unsigned index = blocking ? GFS2_LKS_SRTTB : GFS2_LKS_SRTT; s64 rtt; preempt_disable(); rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp)); - lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats); + lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats); gfs2_update_stats(&gl->gl_stats, index, rtt); /* Local */ gfs2_update_stats(&lks->lkstats[gltype], index, rtt); /* Global */ preempt_enable(); @@ -106,7 +108,7 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl) dstamp = gl->gl_dstamp; gl->gl_dstamp = ktime_get_real(); irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp)); - lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats); + lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats); gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt); /* Local */ gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt); /* Global */ preempt_enable(); @@ -115,9 +117,18 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl) static void gdlm_ast(void *arg) { struct gfs2_glock *gl = arg; - unsigned ret = gl->gl_state; + bool blocking; + unsigned ret; + + blocking = test_bit(GLF_BLOCKING, &gl->gl_flags); + gfs2_update_reply_times(gl, blocking); + clear_bit(GLF_BLOCKING, &gl->gl_flags); + + /* If the glock is dead, we only react to a dlm_unlock() reply. */ + if (__lockref_is_dead(&gl->gl_lockref) && + gl->gl_lksb.sb_status != -DLM_EUNLOCK) + return; - gfs2_update_reply_times(gl); BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr) @@ -128,13 +139,16 @@ static void gdlm_ast(void *arg) gfs2_glock_free(gl); return; case -DLM_ECANCEL: /* Cancel while getting lock */ - ret |= LM_OUT_CANCELED; + ret = LM_OUT_CANCELED; goto out; case -EAGAIN: /* Try lock fails */ + ret = LM_OUT_TRY_AGAIN; + goto out; case -EDEADLK: /* Deadlock detected */ + ret = LM_OUT_DEADLOCK; goto out; case -ETIMEDOUT: /* Canceled due to timeout */ - ret |= LM_OUT_ERROR; + ret = LM_OUT_ERROR; goto out; case 0: /* Success */ break; @@ -143,20 +157,22 @@ static void gdlm_ast(void *arg) } ret = gl->gl_req; - if (gl->gl_lksb.sb_flags & DLM_SBF_ALTMODE) { - if (gl->gl_req == LM_ST_SHARED) - ret = LM_ST_DEFERRED; - else if (gl->gl_req == LM_ST_DEFERRED) - ret = LM_ST_SHARED; - else - BUG(); - } - set_bit(GLF_INITIAL, &gl->gl_flags); + /* + * The GLF_INITIAL flag is initially set for new glocks. Upon the + * first successful new (non-conversion) request, we clear this flag to + * indicate that a DLM lock exists and that gl->gl_lksb.sb_lkid is the + * identifier to use for identifying it. + * + * Any failed initial requests do not create a DLM lock, so we ignore + * the gl->gl_lksb.sb_lkid values that come with such requests. + */ + + clear_bit(GLF_INITIAL, &gl->gl_flags); gfs2_glock_complete(gl, ret); return; out: - if (!test_bit(GLF_INITIAL, &gl->gl_flags)) + if (test_bit(GLF_INITIAL, &gl->gl_flags)) gl->gl_lksb.sb_lkid = 0; gfs2_glock_complete(gl, ret); } @@ -165,6 +181,9 @@ static void gdlm_bast(void *arg, int mode) { struct gfs2_glock *gl = arg; + if (__lockref_is_dead(&gl->gl_lockref)) + return; + switch (mode) { case DLM_LOCK_EX: gfs2_glock_cb(gl, LM_ST_UNLOCKED); @@ -176,14 +195,14 @@ static void gdlm_bast(void *arg, int mode) gfs2_glock_cb(gl, LM_ST_SHARED); break; default: - printk(KERN_ERR "unknown bast mode %d", mode); + fs_err(gl->gl_name.ln_sbd, "unknown bast mode %d\n", mode); BUG(); } } /* convert gfs lock-state to dlm lock-mode */ -static int make_mode(const unsigned int lmstate) +static int make_mode(struct gfs2_sbd *sdp, const unsigned int lmstate) { switch (lmstate) { case LM_ST_UNLOCKED: @@ -195,13 +214,26 @@ static int make_mode(const unsigned int lmstate) case LM_ST_SHARED: return DLM_LOCK_PR; } - printk(KERN_ERR "unknown LM state %d", lmstate); + fs_err(sdp, "unknown LM state %d\n", lmstate); BUG(); return -1; } +/* Taken from fs/dlm/lock.c. */ + +static bool middle_conversion(int cur, int req) +{ + return (cur == DLM_LOCK_PR && req == DLM_LOCK_CW) || + (cur == DLM_LOCK_CW && req == DLM_LOCK_PR); +} + +static bool down_conversion(int cur, int req) +{ + return !middle_conversion(cur, req) && req < cur; +} + static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, - const int req) + const int req, bool blocking) { u32 lkf = 0; @@ -216,23 +248,16 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, lkf |= DLM_LKF_NOQUEUEBAST; } - if (gfs_flags & LM_FLAG_PRIORITY) { - lkf |= DLM_LKF_NOORDER; - lkf |= DLM_LKF_HEADQUE; - } - - if (gfs_flags & LM_FLAG_ANY) { - if (req == DLM_LOCK_PR) - lkf |= DLM_LKF_ALTCW; - else if (req == DLM_LOCK_CW) - lkf |= DLM_LKF_ALTPR; - else - BUG(); - } - - if (gl->gl_lksb.sb_lkid != 0) { + if (!test_bit(GLF_INITIAL, &gl->gl_flags)) { lkf |= DLM_LKF_CONVERT; - if (test_bit(GLF_BLOCKING, &gl->gl_flags)) + + /* + * The DLM_LKF_QUECVT flag needs to be set for "first come, + * first served" semantics, but it must only be set for + * "upward" lock conversions or else DLM will reject the + * request as invalid. + */ + if (blocking) lkf |= DLM_LKF_QUECVT; } @@ -251,74 +276,119 @@ static void gfs2_reverse_hex(char *c, u64 value) static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, unsigned int flags) { - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; - int req; + struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; + bool blocking; + int cur, req; u32 lkf; char strname[GDLM_STRNAME_BYTES] = ""; + int error; - req = make_mode(req_state); - lkf = make_flags(gl, flags, req); + gl->gl_req = req_state; + cur = make_mode(gl->gl_name.ln_sbd, gl->gl_state); + req = make_mode(gl->gl_name.ln_sbd, req_state); + blocking = !down_conversion(cur, req) && + !(flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)); + lkf = make_flags(gl, flags, req, blocking); + if (blocking) + set_bit(GLF_BLOCKING, &gl->gl_flags); gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); - if (gl->gl_lksb.sb_lkid) { - gfs2_update_request_times(gl); - } else { + if (test_bit(GLF_INITIAL, &gl->gl_flags)) { memset(strname, ' ', GDLM_STRNAME_BYTES - 1); strname[GDLM_STRNAME_BYTES - 1] = '\0'; gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type); gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number); gl->gl_dstamp = ktime_get_real(); + } else { + gfs2_update_request_times(gl); } /* * Submit the actual lock request. */ - return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, - GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); +again: + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) { + error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname, + GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); + } + up_read(&ls->ls_sem); + if (error == -EBUSY) { + msleep(20); + goto again; + } + return error; } static void gdlm_put_lock(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_sbd; + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct lm_lockstruct *ls = &sdp->sd_lockstruct; - int lvb_needs_unlock = 0; + uint32_t flags = 0; int error; - if (gl->gl_lksb.sb_lkid == 0) { + BUG_ON(!__lockref_is_dead(&gl->gl_lockref)); + + if (test_bit(GLF_INITIAL, &gl->gl_flags)) { gfs2_glock_free(gl); return; } - clear_bit(GLF_BLOCKING, &gl->gl_flags); gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_update_request_times(gl); - /* don't want to skip dlm_unlock writing the lvb when lock is ex */ - - if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE)) - lvb_needs_unlock = 1; + /* + * When the lockspace is released, all remaining glocks will be + * unlocked automatically. This is more efficient than unlocking them + * individually, but when the lock is held in DLM_LOCK_EX or + * DLM_LOCK_PW mode, the lock value block (LVB) would be lost. + */ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && - !lvb_needs_unlock) { + (!gl->gl_lksb.sb_lvbptr || gl->gl_state != LM_ST_EXCLUSIVE)) { + gfs2_glock_free_later(gl); + return; + } + + if (gl->gl_lksb.sb_lvbptr) + flags |= DLM_LKF_VALBLK; + +again: + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) { + error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, flags, + NULL, gl); + } + up_read(&ls->ls_sem); + if (error == -EBUSY) { + msleep(20); + goto again; + } + + if (error == -ENODEV) { gfs2_glock_free(gl); return; } - error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, - NULL, gl); if (error) { - printk(KERN_ERR "gdlm_unlock %x,%llx err=%d\n", + fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n", gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, error); - return; } } static void gdlm_cancel(struct gfs2_glock *gl) { - struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct; - dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); + struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; + + down_read(&ls->ls_sem); + if (likely(ls->ls_dlm != NULL)) { + dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl); + } + up_read(&ls->ls_sem); } /* @@ -466,19 +536,19 @@ static void gdlm_cancel(struct gfs2_glock *gl) static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE); - memcpy(&gen, lvb_bits, sizeof(uint32_t)); + memcpy(&gen, lvb_bits, sizeof(__le32)); *lvb_gen = le32_to_cpu(gen); } static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen, char *lvb_bits) { - uint32_t gen; + __le32 gen; memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE); gen = cpu_to_le32(lvb_gen); - memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t)); + memcpy(ls->ls_control_lvb, &gen, sizeof(__le32)); } static int all_jid_bits_clear(char *lvb) @@ -498,7 +568,11 @@ static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; - error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) + error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls); + up_read(&ls->ls_sem); if (error) { fs_err(sdp, "%s lkid %x error %d\n", name, lksb->sb_lkid, error); @@ -525,9 +599,14 @@ static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags, memset(strname, 0, GDLM_STRNAME_BYTES); snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num); - error = dlm_lock(ls->ls_dlm, mode, lksb, flags, - strname, GDLM_STRNAME_BYTES - 1, - 0, sync_wait_cb, ls, NULL); + down_read(&ls->ls_sem); + error = -ENODEV; + if (likely(ls->ls_dlm != NULL)) { + error = dlm_lock(ls->ls_dlm, mode, lksb, flags, + strname, GDLM_STRNAME_BYTES - 1, + 0, sync_wait_cb, ls, NULL); + } + up_read(&ls->ls_sem); if (error) { fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n", name, lksb->sb_lkid, flags, mode, error); @@ -820,6 +899,13 @@ restart: goto fail; } + /** + * If we're a spectator, we don't want to take the lock in EX because + * we cannot do the first-mount responsibility it implies: recovery. + */ + if (sdp->sd_args.ar_spectator) + goto locks_done; + error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE); if (!error) { mounted_mode = DLM_LOCK_EX; @@ -895,10 +981,18 @@ locks_done: if (lvb_gen < mount_gen) { /* wait for mounted nodes to update control_lock lvb to our generation, which might include new recovery bits set */ - fs_info(sdp, "control_mount wait1 block %u start %u mount %u " - "lvb %u flags %lx\n", block_gen, start_gen, mount_gen, - lvb_gen, ls->ls_recover_flags); - spin_unlock(&ls->ls_recover_spin); + if (sdp->sd_args.ar_spectator) { + fs_info(sdp, "Recovery is required. Waiting for a " + "non-spectator to mount.\n"); + spin_unlock(&ls->ls_recover_spin); + msleep_interruptible(1000); + } else { + fs_info(sdp, "control_mount wait1 block %u start %u " + "mount %u lvb %u flags %lx\n", block_gen, + start_gen, mount_gen, lvb_gen, + ls->ls_recover_flags); + spin_unlock(&ls->ls_recover_spin); + } goto restart; } @@ -934,12 +1028,6 @@ fail: return error; } -static int dlm_recovery_wait(void *word) -{ - schedule(); - return 0; -} - static int control_first_done(struct gfs2_sbd *sdp) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; @@ -974,7 +1062,7 @@ restart: fs_info(sdp, "control_first_done wait gen %u\n", start_gen); wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY, - dlm_recovery_wait, TASK_UNINTERRUPTIBLE); + TASK_UNINTERRUPTIBLE); goto restart; } @@ -1000,7 +1088,7 @@ restart: /* * Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC) - * to accomodate the largest slot number. (NB dlm slot numbers start at 1, + * to accommodate the largest slot number. (NB dlm slot numbers start at 1, * gfs2 jids start at 0, so jid = slot - 1) */ @@ -1028,14 +1116,14 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots, } old_size = ls->ls_recover_size; - - if (old_size >= max_jid + 1) + new_size = old_size; + while (new_size < max_jid + 1) + new_size += RECOVER_SIZE_INC; + if (new_size == old_size) return 0; - new_size = old_size + RECOVER_SIZE_INC; - - submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); - result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS); + submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); + result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS); if (!submit || !result) { kfree(submit); kfree(result); @@ -1062,6 +1150,7 @@ static void free_recover_size(struct lm_lockstruct *ls) ls->ls_recover_submit = NULL; ls->ls_recover_result = NULL; ls->ls_recover_size = 0; + ls->ls_lvb_bits = NULL; } /* dlm calls before it does lock recovery */ @@ -1071,6 +1160,10 @@ static void gdlm_recover_prep(void *arg) struct gfs2_sbd *sdp = arg; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recover_prep ignored due to withdraw.\n"); + return; + } spin_lock(&ls->ls_recover_spin); ls->ls_recover_block = ls->ls_recover_start; set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); @@ -1093,16 +1186,21 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int jid = slot->slot - 1; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n", + jid); + return; + } spin_lock(&ls->ls_recover_spin); if (ls->ls_recover_size < jid + 1) { - fs_err(sdp, "recover_slot jid %d gen %u short size %d", + fs_err(sdp, "recover_slot jid %d gen %u short size %d\n", jid, ls->ls_recover_block, ls->ls_recover_size); spin_unlock(&ls->ls_recover_spin); return; } if (ls->ls_recover_submit[jid]) { - fs_info(sdp, "recover_slot jid %d gen %u prev %u", + fs_info(sdp, "recover_slot jid %d gen %u prev %u\n", jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); } ls->ls_recover_submit[jid] = ls->ls_recover_block; @@ -1117,6 +1215,10 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, struct gfs2_sbd *sdp = arg; struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recover_done ignored due to withdraw.\n"); + return; + } /* ensure the ls jid arrays are large enough */ set_recover_size(sdp, slots, num_slots); @@ -1132,7 +1234,7 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots, queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0); clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY); spin_unlock(&ls->ls_recover_spin); } @@ -1144,6 +1246,11 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, { struct lm_lockstruct *ls = &sdp->sd_lockstruct; + if (gfs2_withdrawn(sdp)) { + fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n", + jid); + return; + } if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags)) return; @@ -1157,7 +1264,7 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, return; } if (ls->ls_recover_size < jid + 1) { - fs_err(sdp, "recovery_result jid %d short size %d", + fs_err(sdp, "recovery_result jid %d short size %d\n", jid, ls->ls_recover_size); spin_unlock(&ls->ls_recover_spin); return; @@ -1178,7 +1285,7 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid, spin_unlock(&ls->ls_recover_spin); } -const struct dlm_lockspace_ops gdlm_lockspace_ops = { +static const struct dlm_lockspace_ops gdlm_lockspace_ops = { .recover_prep = gdlm_recover_prep, .recover_slot = gdlm_recover_slot, .recover_done = gdlm_recover_done, @@ -1197,6 +1304,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) */ INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func); + ls->ls_dlm = NULL; spin_lock_init(&ls->ls_recover_spin); ls->ls_recover_flags = 0; ls->ls_recover_mount = 0; @@ -1225,12 +1333,13 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) memcpy(cluster, table, strlen(table) - strlen(fsname)); fsname++; - flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL; + flags = DLM_LSFL_NEWEXCL; /* * create/join lockspace */ + init_rwsem(&ls->ls_sem); error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE, &gdlm_lockspace_ops, sdp, &ops_result, &ls->ls_dlm); @@ -1269,12 +1378,12 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table) ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags); clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); return 0; fail_release: - dlm_release_lockspace(ls->ls_dlm, 2); + dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL); fail_free: free_recover_size(ls); fail: @@ -1294,7 +1403,15 @@ static void gdlm_first_done(struct gfs2_sbd *sdp) fs_err(sdp, "mount first_done error %d\n", error); } -static void gdlm_unmount(struct gfs2_sbd *sdp) +/* + * gdlm_unmount - release our lockspace + * @sdp: the superblock + * @clean: Indicates whether or not the remaining nodes in the cluster should + * perform recovery. Recovery is necessary when a node withdraws and + * its journal remains dirty. Recovery isn't necessary when a node + * cleanly unmounts a filesystem. + */ +static void gdlm_unmount(struct gfs2_sbd *sdp, bool clean) { struct lm_lockstruct *ls = &sdp->sd_lockstruct; @@ -1310,10 +1427,14 @@ static void gdlm_unmount(struct gfs2_sbd *sdp) /* mounted_lock and control_lock will be purged in dlm recovery */ release: + down_write(&ls->ls_sem); if (ls->ls_dlm) { - dlm_release_lockspace(ls->ls_dlm, 2); + dlm_release_lockspace(ls->ls_dlm, + clean ? DLM_RELEASE_NORMAL : + DLM_RELEASE_RECOVER); ls->ls_dlm = NULL; } + up_write(&ls->ls_sem); free_recover_size(ls); } |
