summaryrefslogtreecommitdiff
path: root/fs/gfs2/lock_dlm.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/gfs2/lock_dlm.c')
-rw-r--r--fs/gfs2/lock_dlm.c367
1 files changed, 244 insertions, 123 deletions
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index c8423d6de6c3..b8d249925395 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1,59 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
* Copyright 2004-2011 Red Hat, Inc.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/fs.h>
#include <linux/dlm.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/delay.h>
#include <linux/gfs2_ondisk.h>
+#include <linux/sched/signal.h>
#include "incore.h"
-#include "glock.h"
#include "util.h"
#include "sys.h"
#include "trace_gfs2.h"
-extern struct workqueue_struct *gfs2_control_wq;
-
/**
* gfs2_update_stats - Update time based stats
- * @mv: Pointer to mean/variance structure to update
+ * @s: The stats to update (local or global)
+ * @index: The index inside @s
* @sample: New data to include
- *
- * @delta is the difference between the current rtt sample and the
- * running average srtt. We add 1/8 of that to the srtt in order to
- * update the current srtt estimate. The varience estimate is a bit
- * more complicated. We subtract the abs value of the @delta from
- * the current variance estimate and add 1/4 of that to the running
- * total.
- *
- * Note that the index points at the array entry containing the smoothed
- * mean value, and the variance is always in the following entry
- *
- * Reference: TCP/IP Illustrated, vol 2, p. 831,832
- * All times are in units of integer nanoseconds. Unlike the TCP/IP case,
- * they are not scaled fixed point.
*/
-
static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
s64 sample)
{
+ /*
+ * @delta is the difference between the current rtt sample and the
+ * running average srtt. We add 1/8 of that to the srtt in order to
+ * update the current srtt estimate. The variance estimate is a bit
+ * more complicated. We subtract the current variance estimate from
+ * the abs value of the @delta and add 1/4 of that to the running
+ * total. That's equivalent to 3/4 of the current variance
+ * estimate plus 1/4 of the abs of @delta.
+ *
+ * Note that the index points at the array entry containing the
+ * smoothed mean value, and the variance is always in the following
+ * entry
+ *
+ * Reference: TCP/IP Illustrated, vol 2, p. 831,832
+ * All times are in units of integer nanoseconds. Unlike the TCP/IP
+ * case, they are not scaled fixed point.
+ */
+
s64 delta = sample - s->stats[index];
s->stats[index] += (delta >> 3);
index++;
- s->stats[index] += ((abs64(delta) - s->stats[index]) >> 2);
+ s->stats[index] += (s64)(abs(delta) - s->stats[index]) >> 2;
}
/**
* gfs2_update_reply_times - Update locking statistics
* @gl: The glock to update
+ * @blocking: The operation may have been blocking
*
* This assumes that gl->gl_dstamp has been set earlier.
*
@@ -68,17 +70,17 @@ static inline void gfs2_update_stats(struct gfs2_lkstats *s, unsigned index,
* TRY_1CB flags are set are classified as non-blocking. All
* other DLM requests are counted as (potentially) blocking.
*/
-static inline void gfs2_update_reply_times(struct gfs2_glock *gl)
+static inline void gfs2_update_reply_times(struct gfs2_glock *gl,
+ bool blocking)
{
struct gfs2_pcpu_lkstats *lks;
const unsigned gltype = gl->gl_name.ln_type;
- unsigned index = test_bit(GLF_BLOCKING, &gl->gl_flags) ?
- GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
+ unsigned index = blocking ? GFS2_LKS_SRTTB : GFS2_LKS_SRTT;
s64 rtt;
preempt_disable();
rtt = ktime_to_ns(ktime_sub(ktime_get_real(), gl->gl_dstamp));
- lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
+ lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats);
gfs2_update_stats(&gl->gl_stats, index, rtt); /* Local */
gfs2_update_stats(&lks->lkstats[gltype], index, rtt); /* Global */
preempt_enable();
@@ -106,7 +108,7 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl)
dstamp = gl->gl_dstamp;
gl->gl_dstamp = ktime_get_real();
irt = ktime_to_ns(ktime_sub(gl->gl_dstamp, dstamp));
- lks = this_cpu_ptr(gl->gl_sbd->sd_lkstats);
+ lks = this_cpu_ptr(gl->gl_name.ln_sbd->sd_lkstats);
gfs2_update_stats(&gl->gl_stats, GFS2_LKS_SIRT, irt); /* Local */
gfs2_update_stats(&lks->lkstats[gltype], GFS2_LKS_SIRT, irt); /* Global */
preempt_enable();
@@ -115,9 +117,18 @@ static inline void gfs2_update_request_times(struct gfs2_glock *gl)
static void gdlm_ast(void *arg)
{
struct gfs2_glock *gl = arg;
- unsigned ret = gl->gl_state;
+ bool blocking;
+ unsigned ret;
+
+ blocking = test_bit(GLF_BLOCKING, &gl->gl_flags);
+ gfs2_update_reply_times(gl, blocking);
+ clear_bit(GLF_BLOCKING, &gl->gl_flags);
+
+ /* If the glock is dead, we only react to a dlm_unlock() reply. */
+ if (__lockref_is_dead(&gl->gl_lockref) &&
+ gl->gl_lksb.sb_status != -DLM_EUNLOCK)
+ return;
- gfs2_update_reply_times(gl);
BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
if ((gl->gl_lksb.sb_flags & DLM_SBF_VALNOTVALID) && gl->gl_lksb.sb_lvbptr)
@@ -128,13 +139,16 @@ static void gdlm_ast(void *arg)
gfs2_glock_free(gl);
return;
case -DLM_ECANCEL: /* Cancel while getting lock */
- ret |= LM_OUT_CANCELED;
+ ret = LM_OUT_CANCELED;
goto out;
case -EAGAIN: /* Try lock fails */
+ ret = LM_OUT_TRY_AGAIN;
+ goto out;
case -EDEADLK: /* Deadlock detected */
+ ret = LM_OUT_DEADLOCK;
goto out;
case -ETIMEDOUT: /* Canceled due to timeout */
- ret |= LM_OUT_ERROR;
+ ret = LM_OUT_ERROR;
goto out;
case 0: /* Success */
break;
@@ -143,20 +157,22 @@ static void gdlm_ast(void *arg)
}
ret = gl->gl_req;
- if (gl->gl_lksb.sb_flags & DLM_SBF_ALTMODE) {
- if (gl->gl_req == LM_ST_SHARED)
- ret = LM_ST_DEFERRED;
- else if (gl->gl_req == LM_ST_DEFERRED)
- ret = LM_ST_SHARED;
- else
- BUG();
- }
- set_bit(GLF_INITIAL, &gl->gl_flags);
+ /*
+ * The GLF_INITIAL flag is initially set for new glocks. Upon the
+ * first successful new (non-conversion) request, we clear this flag to
+ * indicate that a DLM lock exists and that gl->gl_lksb.sb_lkid is the
+ * identifier to use for identifying it.
+ *
+ * Any failed initial requests do not create a DLM lock, so we ignore
+ * the gl->gl_lksb.sb_lkid values that come with such requests.
+ */
+
+ clear_bit(GLF_INITIAL, &gl->gl_flags);
gfs2_glock_complete(gl, ret);
return;
out:
- if (!test_bit(GLF_INITIAL, &gl->gl_flags))
+ if (test_bit(GLF_INITIAL, &gl->gl_flags))
gl->gl_lksb.sb_lkid = 0;
gfs2_glock_complete(gl, ret);
}
@@ -165,6 +181,9 @@ static void gdlm_bast(void *arg, int mode)
{
struct gfs2_glock *gl = arg;
+ if (__lockref_is_dead(&gl->gl_lockref))
+ return;
+
switch (mode) {
case DLM_LOCK_EX:
gfs2_glock_cb(gl, LM_ST_UNLOCKED);
@@ -176,14 +195,14 @@ static void gdlm_bast(void *arg, int mode)
gfs2_glock_cb(gl, LM_ST_SHARED);
break;
default:
- printk(KERN_ERR "unknown bast mode %d", mode);
+ fs_err(gl->gl_name.ln_sbd, "unknown bast mode %d\n", mode);
BUG();
}
}
/* convert gfs lock-state to dlm lock-mode */
-static int make_mode(const unsigned int lmstate)
+static int make_mode(struct gfs2_sbd *sdp, const unsigned int lmstate)
{
switch (lmstate) {
case LM_ST_UNLOCKED:
@@ -195,13 +214,26 @@ static int make_mode(const unsigned int lmstate)
case LM_ST_SHARED:
return DLM_LOCK_PR;
}
- printk(KERN_ERR "unknown LM state %d", lmstate);
+ fs_err(sdp, "unknown LM state %d\n", lmstate);
BUG();
return -1;
}
+/* Taken from fs/dlm/lock.c. */
+
+static bool middle_conversion(int cur, int req)
+{
+ return (cur == DLM_LOCK_PR && req == DLM_LOCK_CW) ||
+ (cur == DLM_LOCK_CW && req == DLM_LOCK_PR);
+}
+
+static bool down_conversion(int cur, int req)
+{
+ return !middle_conversion(cur, req) && req < cur;
+}
+
static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
- const int req)
+ const int req, bool blocking)
{
u32 lkf = 0;
@@ -216,23 +248,16 @@ static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags,
lkf |= DLM_LKF_NOQUEUEBAST;
}
- if (gfs_flags & LM_FLAG_PRIORITY) {
- lkf |= DLM_LKF_NOORDER;
- lkf |= DLM_LKF_HEADQUE;
- }
-
- if (gfs_flags & LM_FLAG_ANY) {
- if (req == DLM_LOCK_PR)
- lkf |= DLM_LKF_ALTCW;
- else if (req == DLM_LOCK_CW)
- lkf |= DLM_LKF_ALTPR;
- else
- BUG();
- }
-
- if (gl->gl_lksb.sb_lkid != 0) {
+ if (!test_bit(GLF_INITIAL, &gl->gl_flags)) {
lkf |= DLM_LKF_CONVERT;
- if (test_bit(GLF_BLOCKING, &gl->gl_flags))
+
+ /*
+ * The DLM_LKF_QUECVT flag needs to be set for "first come,
+ * first served" semantics, but it must only be set for
+ * "upward" lock conversions or else DLM will reject the
+ * request as invalid.
+ */
+ if (blocking)
lkf |= DLM_LKF_QUECVT;
}
@@ -251,74 +276,119 @@ static void gfs2_reverse_hex(char *c, u64 value)
static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state,
unsigned int flags)
{
- struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
- int req;
+ struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
+ bool blocking;
+ int cur, req;
u32 lkf;
char strname[GDLM_STRNAME_BYTES] = "";
+ int error;
- req = make_mode(req_state);
- lkf = make_flags(gl, flags, req);
+ gl->gl_req = req_state;
+ cur = make_mode(gl->gl_name.ln_sbd, gl->gl_state);
+ req = make_mode(gl->gl_name.ln_sbd, req_state);
+ blocking = !down_conversion(cur, req) &&
+ !(flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB));
+ lkf = make_flags(gl, flags, req, blocking);
+ if (blocking)
+ set_bit(GLF_BLOCKING, &gl->gl_flags);
gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
- if (gl->gl_lksb.sb_lkid) {
- gfs2_update_request_times(gl);
- } else {
+ if (test_bit(GLF_INITIAL, &gl->gl_flags)) {
memset(strname, ' ', GDLM_STRNAME_BYTES - 1);
strname[GDLM_STRNAME_BYTES - 1] = '\0';
gfs2_reverse_hex(strname + 7, gl->gl_name.ln_type);
gfs2_reverse_hex(strname + 23, gl->gl_name.ln_number);
gl->gl_dstamp = ktime_get_real();
+ } else {
+ gfs2_update_request_times(gl);
}
/*
* Submit the actual lock request.
*/
- return dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
- GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
+again:
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL)) {
+ error = dlm_lock(ls->ls_dlm, req, &gl->gl_lksb, lkf, strname,
+ GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast);
+ }
+ up_read(&ls->ls_sem);
+ if (error == -EBUSY) {
+ msleep(20);
+ goto again;
+ }
+ return error;
}
static void gdlm_put_lock(struct gfs2_glock *gl)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- int lvb_needs_unlock = 0;
+ uint32_t flags = 0;
int error;
- if (gl->gl_lksb.sb_lkid == 0) {
+ BUG_ON(!__lockref_is_dead(&gl->gl_lockref));
+
+ if (test_bit(GLF_INITIAL, &gl->gl_flags)) {
gfs2_glock_free(gl);
return;
}
- clear_bit(GLF_BLOCKING, &gl->gl_flags);
gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
gfs2_update_request_times(gl);
- /* don't want to skip dlm_unlock writing the lvb when lock is ex */
-
- if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
- lvb_needs_unlock = 1;
+ /*
+ * When the lockspace is released, all remaining glocks will be
+ * unlocked automatically. This is more efficient than unlocking them
+ * individually, but when the lock is held in DLM_LOCK_EX or
+ * DLM_LOCK_PW mode, the lock value block (LVB) would be lost.
+ */
if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
- !lvb_needs_unlock) {
+ (!gl->gl_lksb.sb_lvbptr || gl->gl_state != LM_ST_EXCLUSIVE)) {
+ gfs2_glock_free_later(gl);
+ return;
+ }
+
+ if (gl->gl_lksb.sb_lvbptr)
+ flags |= DLM_LKF_VALBLK;
+
+again:
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL)) {
+ error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, flags,
+ NULL, gl);
+ }
+ up_read(&ls->ls_sem);
+ if (error == -EBUSY) {
+ msleep(20);
+ goto again;
+ }
+
+ if (error == -ENODEV) {
gfs2_glock_free(gl);
return;
}
- error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK,
- NULL, gl);
if (error) {
- printk(KERN_ERR "gdlm_unlock %x,%llx err=%d\n",
+ fs_err(sdp, "gdlm_unlock %x,%llx err=%d\n",
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number, error);
- return;
}
}
static void gdlm_cancel(struct gfs2_glock *gl)
{
- struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
- dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
+ struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
+
+ down_read(&ls->ls_sem);
+ if (likely(ls->ls_dlm != NULL)) {
+ dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_CANCEL, NULL, gl);
+ }
+ up_read(&ls->ls_sem);
}
/*
@@ -466,19 +536,19 @@ static void gdlm_cancel(struct gfs2_glock *gl)
static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
char *lvb_bits)
{
- uint32_t gen;
+ __le32 gen;
memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
- memcpy(&gen, lvb_bits, sizeof(uint32_t));
+ memcpy(&gen, lvb_bits, sizeof(__le32));
*lvb_gen = le32_to_cpu(gen);
}
static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
char *lvb_bits)
{
- uint32_t gen;
+ __le32 gen;
memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
gen = cpu_to_le32(lvb_gen);
- memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t));
+ memcpy(ls->ls_control_lvb, &gen, sizeof(__le32));
}
static int all_jid_bits_clear(char *lvb)
@@ -498,7 +568,11 @@ static int sync_unlock(struct gfs2_sbd *sdp, struct dlm_lksb *lksb, char *name)
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int error;
- error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL))
+ error = dlm_unlock(ls->ls_dlm, lksb->sb_lkid, 0, lksb, ls);
+ up_read(&ls->ls_sem);
if (error) {
fs_err(sdp, "%s lkid %x error %d\n",
name, lksb->sb_lkid, error);
@@ -525,9 +599,14 @@ static int sync_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags,
memset(strname, 0, GDLM_STRNAME_BYTES);
snprintf(strname, GDLM_STRNAME_BYTES, "%8x%16x", LM_TYPE_NONDISK, num);
- error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
- strname, GDLM_STRNAME_BYTES - 1,
- 0, sync_wait_cb, ls, NULL);
+ down_read(&ls->ls_sem);
+ error = -ENODEV;
+ if (likely(ls->ls_dlm != NULL)) {
+ error = dlm_lock(ls->ls_dlm, mode, lksb, flags,
+ strname, GDLM_STRNAME_BYTES - 1,
+ 0, sync_wait_cb, ls, NULL);
+ }
+ up_read(&ls->ls_sem);
if (error) {
fs_err(sdp, "%s lkid %x flags %x mode %d error %d\n",
name, lksb->sb_lkid, flags, mode, error);
@@ -820,6 +899,13 @@ restart:
goto fail;
}
+ /**
+ * If we're a spectator, we don't want to take the lock in EX because
+ * we cannot do the first-mount responsibility it implies: recovery.
+ */
+ if (sdp->sd_args.ar_spectator)
+ goto locks_done;
+
error = mounted_lock(sdp, DLM_LOCK_EX, DLM_LKF_CONVERT|DLM_LKF_NOQUEUE);
if (!error) {
mounted_mode = DLM_LOCK_EX;
@@ -895,10 +981,18 @@ locks_done:
if (lvb_gen < mount_gen) {
/* wait for mounted nodes to update control_lock lvb to our
generation, which might include new recovery bits set */
- fs_info(sdp, "control_mount wait1 block %u start %u mount %u "
- "lvb %u flags %lx\n", block_gen, start_gen, mount_gen,
- lvb_gen, ls->ls_recover_flags);
- spin_unlock(&ls->ls_recover_spin);
+ if (sdp->sd_args.ar_spectator) {
+ fs_info(sdp, "Recovery is required. Waiting for a "
+ "non-spectator to mount.\n");
+ spin_unlock(&ls->ls_recover_spin);
+ msleep_interruptible(1000);
+ } else {
+ fs_info(sdp, "control_mount wait1 block %u start %u "
+ "mount %u lvb %u flags %lx\n", block_gen,
+ start_gen, mount_gen, lvb_gen,
+ ls->ls_recover_flags);
+ spin_unlock(&ls->ls_recover_spin);
+ }
goto restart;
}
@@ -934,12 +1028,6 @@ fail:
return error;
}
-static int dlm_recovery_wait(void *word)
-{
- schedule();
- return 0;
-}
-
static int control_first_done(struct gfs2_sbd *sdp)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -974,7 +1062,7 @@ restart:
fs_info(sdp, "control_first_done wait gen %u\n", start_gen);
wait_on_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY,
- dlm_recovery_wait, TASK_UNINTERRUPTIBLE);
+ TASK_UNINTERRUPTIBLE);
goto restart;
}
@@ -1000,7 +1088,7 @@ restart:
/*
* Expand static jid arrays if necessary (by increments of RECOVER_SIZE_INC)
- * to accomodate the largest slot number. (NB dlm slot numbers start at 1,
+ * to accommodate the largest slot number. (NB dlm slot numbers start at 1,
* gfs2 jids start at 0, so jid = slot - 1)
*/
@@ -1028,14 +1116,14 @@ static int set_recover_size(struct gfs2_sbd *sdp, struct dlm_slot *slots,
}
old_size = ls->ls_recover_size;
-
- if (old_size >= max_jid + 1)
+ new_size = old_size;
+ while (new_size < max_jid + 1)
+ new_size += RECOVER_SIZE_INC;
+ if (new_size == old_size)
return 0;
- new_size = old_size + RECOVER_SIZE_INC;
-
- submit = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
- result = kzalloc(new_size * sizeof(uint32_t), GFP_NOFS);
+ submit = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
+ result = kcalloc(new_size, sizeof(uint32_t), GFP_NOFS);
if (!submit || !result) {
kfree(submit);
kfree(result);
@@ -1062,6 +1150,7 @@ static void free_recover_size(struct lm_lockstruct *ls)
ls->ls_recover_submit = NULL;
ls->ls_recover_result = NULL;
ls->ls_recover_size = 0;
+ ls->ls_lvb_bits = NULL;
}
/* dlm calls before it does lock recovery */
@@ -1071,6 +1160,10 @@ static void gdlm_recover_prep(void *arg)
struct gfs2_sbd *sdp = arg;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ if (gfs2_withdrawn(sdp)) {
+ fs_err(sdp, "recover_prep ignored due to withdraw.\n");
+ return;
+ }
spin_lock(&ls->ls_recover_spin);
ls->ls_recover_block = ls->ls_recover_start;
set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
@@ -1093,16 +1186,21 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int jid = slot->slot - 1;
+ if (gfs2_withdrawn(sdp)) {
+ fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n",
+ jid);
+ return;
+ }
spin_lock(&ls->ls_recover_spin);
if (ls->ls_recover_size < jid + 1) {
- fs_err(sdp, "recover_slot jid %d gen %u short size %d",
+ fs_err(sdp, "recover_slot jid %d gen %u short size %d\n",
jid, ls->ls_recover_block, ls->ls_recover_size);
spin_unlock(&ls->ls_recover_spin);
return;
}
if (ls->ls_recover_submit[jid]) {
- fs_info(sdp, "recover_slot jid %d gen %u prev %u",
+ fs_info(sdp, "recover_slot jid %d gen %u prev %u\n",
jid, ls->ls_recover_block, ls->ls_recover_submit[jid]);
}
ls->ls_recover_submit[jid] = ls->ls_recover_block;
@@ -1117,6 +1215,10 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
struct gfs2_sbd *sdp = arg;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ if (gfs2_withdrawn(sdp)) {
+ fs_err(sdp, "recover_done ignored due to withdraw.\n");
+ return;
+ }
/* ensure the ls jid arrays are large enough */
set_recover_size(sdp, slots, num_slots);
@@ -1132,7 +1234,7 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
clear_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&ls->ls_recover_flags, DFL_DLM_RECOVERY);
spin_unlock(&ls->ls_recover_spin);
}
@@ -1144,6 +1246,11 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ if (gfs2_withdrawn(sdp)) {
+ fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n",
+ jid);
+ return;
+ }
if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
return;
@@ -1157,7 +1264,7 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
return;
}
if (ls->ls_recover_size < jid + 1) {
- fs_err(sdp, "recovery_result jid %d short size %d",
+ fs_err(sdp, "recovery_result jid %d short size %d\n",
jid, ls->ls_recover_size);
spin_unlock(&ls->ls_recover_spin);
return;
@@ -1178,7 +1285,7 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
spin_unlock(&ls->ls_recover_spin);
}
-const struct dlm_lockspace_ops gdlm_lockspace_ops = {
+static const struct dlm_lockspace_ops gdlm_lockspace_ops = {
.recover_prep = gdlm_recover_prep,
.recover_slot = gdlm_recover_slot,
.recover_done = gdlm_recover_done,
@@ -1197,6 +1304,7 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
*/
INIT_DELAYED_WORK(&sdp->sd_control_work, gfs2_control_func);
+ ls->ls_dlm = NULL;
spin_lock_init(&ls->ls_recover_spin);
ls->ls_recover_flags = 0;
ls->ls_recover_mount = 0;
@@ -1225,12 +1333,13 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
memcpy(cluster, table, strlen(table) - strlen(fsname));
fsname++;
- flags = DLM_LSFL_FS | DLM_LSFL_NEWEXCL;
+ flags = DLM_LSFL_NEWEXCL;
/*
* create/join lockspace
*/
+ init_rwsem(&ls->ls_sem);
error = dlm_new_lockspace(fsname, cluster, flags, GDLM_LVB_SIZE,
&gdlm_lockspace_ops, sdp, &ops_result,
&ls->ls_dlm);
@@ -1269,12 +1378,12 @@ static int gdlm_mount(struct gfs2_sbd *sdp, const char *table)
ls->ls_first = !!test_bit(DFL_FIRST_MOUNT, &ls->ls_recover_flags);
clear_bit(SDF_NOJOURNALID, &sdp->sd_flags);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID);
return 0;
fail_release:
- dlm_release_lockspace(ls->ls_dlm, 2);
+ dlm_release_lockspace(ls->ls_dlm, DLM_RELEASE_NORMAL);
fail_free:
free_recover_size(ls);
fail:
@@ -1294,7 +1403,15 @@ static void gdlm_first_done(struct gfs2_sbd *sdp)
fs_err(sdp, "mount first_done error %d\n", error);
}
-static void gdlm_unmount(struct gfs2_sbd *sdp)
+/*
+ * gdlm_unmount - release our lockspace
+ * @sdp: the superblock
+ * @clean: Indicates whether or not the remaining nodes in the cluster should
+ * perform recovery. Recovery is necessary when a node withdraws and
+ * its journal remains dirty. Recovery isn't necessary when a node
+ * cleanly unmounts a filesystem.
+ */
+static void gdlm_unmount(struct gfs2_sbd *sdp, bool clean)
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -1310,10 +1427,14 @@ static void gdlm_unmount(struct gfs2_sbd *sdp)
/* mounted_lock and control_lock will be purged in dlm recovery */
release:
+ down_write(&ls->ls_sem);
if (ls->ls_dlm) {
- dlm_release_lockspace(ls->ls_dlm, 2);
+ dlm_release_lockspace(ls->ls_dlm,
+ clean ? DLM_RELEASE_NORMAL :
+ DLM_RELEASE_RECOVER);
ls->ls_dlm = NULL;
}
+ up_write(&ls->ls_sem);
free_recover_size(ls);
}