From 8decab3c8dadcdf4f54ffb30df6e6f67b398b6e0 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Sun, 24 Jul 2011 10:23:54 -0700 Subject: ocfs2/dlm: Clean up messages in o2dlm o2dlm messages needed a facelift. Signed-off-by: Sunil Mushran --- fs/ocfs2/dlm/dlmlock.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'fs/ocfs2/dlm/dlmlock.c') diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 8d39e0fd66f7..c7f3e22bda1e 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -319,27 +319,23 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm, tmpret = o2net_send_message(DLM_CREATE_LOCK_MSG, dlm->key, &create, sizeof(create), res->owner, &status); if (tmpret >= 0) { - // successfully sent and received - ret = status; // this is already a dlm_status + ret = status; if (ret == DLM_REJECTED) { - mlog(ML_ERROR, "%s:%.*s: BUG. this is a stale lockres " - "no longer owned by %u. that node is coming back " - "up currently.\n", dlm->name, create.namelen, + mlog(ML_ERROR, "%s: res %.*s, Stale lockres no longer " + "owned by node %u. That node is coming back up " + "currently.\n", dlm->name, create.namelen, create.name, res->owner); dlm_print_one_lock_resource(res); BUG(); } } else { - mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " - "node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key, - res->owner); - if (dlm_is_host_down(tmpret)) { + mlog(ML_ERROR, "%s: res %.*s, Error %d send CREATE LOCK to " + "node %u\n", dlm->name, create.namelen, create.name, + tmpret, res->owner); + if (dlm_is_host_down(tmpret)) ret = DLM_RECOVERING; - mlog(0, "node %u died so returning DLM_RECOVERING " - "from lock message!\n", res->owner); - } else { + else ret = dlm_err_to_dlm_status(tmpret); - } } return ret; -- cgit From ff0a522e7db79625aa27a433467eb94c5e255718 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Sun, 24 Jul 2011 10:29:54 -0700 Subject: ocfs2/dlm: Take inflight reference count for remotely mastered resources too The inflight reference count, in the lock resource, is taken to pin the resource in memory. We take it when a new resource is created and release it after a lock is attached to it. We do this to prevent the resource from getting purged prematurely. Earlier this reference count was being taken for locally mastered resources only. This patch extends the same functionality for remotely mastered ones. We are doing this because the same premature purging could occur for remotely mastered resources if the remote node were to die before completion of the create lock. Fix for Oracle bug#12405575. Signed-off-by: Sunil Mushran --- fs/ocfs2/dlm/dlmlock.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/ocfs2/dlm/dlmlock.c') diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index c7f3e22bda1e..3ef2c1adfb8f 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -183,10 +183,6 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm, kick_thread = 1; } } - /* reduce the inflight count, this may result in the lockres - * being purged below during calc_usage */ - if (lock->ml.node == dlm->node_num) - dlm_lockres_drop_inflight_ref(dlm, res); spin_unlock(&res->spinlock); wake_up(&res->wq); @@ -737,6 +733,14 @@ retry_lock: } } + /* Inflight taken in dlm_get_lock_resource() is dropped here */ + spin_lock(&res->spinlock); + dlm_lockres_drop_inflight_ref(dlm, res); + spin_unlock(&res->spinlock); + + dlm_lockres_calc_usage(dlm, res); + dlm_kick_thread(dlm, res); + if (status != DLM_NORMAL) { lock->lksb->flags &= ~DLM_LKSB_GET_LVB; if (status != DLM_NOTQUEUED) -- cgit From a2c0cc1579176bd0808ef7deea456767dfa80217 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Sun, 24 Jul 2011 10:30:54 -0700 Subject: ocfs2/dlm: dlmlock_remote() needs to account for remastery In dlmlock_remote(), we wait for the resource to stop being active before setting the inprogress flag. Active includes recovery, migration, etc. The problem here is that if the resource was being recovered or migrated, the new owner could very well be that node itself (and thus not a remote node). This problem was observed in Oracle bug#12583620. The error messages observed were as follows: dlm_send_remote_lock_request:337 ERROR: Error -40 (ELOOP) when sending message 503 (key 0xd6d8c7) to node 2 dlmlock_remote:271 ERROR: dlm status = DLM_BADARGS dlmlock:751 ERROR: dlm status = DLM_BADARGS Signed-off-by: Sunil Mushran --- fs/ocfs2/dlm/dlmlock.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'fs/ocfs2/dlm/dlmlock.c') diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 3ef2c1adfb8f..f32fcba04923 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -227,10 +227,16 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, lock->ml.type, res->lockname.len, res->lockname.name, flags); + /* + * Wait if resource is getting recovered, remastered, etc. + * If the resource was remastered and new owner is self, then exit. + */ spin_lock(&res->spinlock); - - /* will exit this call with spinlock held */ __dlm_wait_on_lockres(res); + if (res->owner == dlm->node_num) { + spin_unlock(&res->spinlock); + return DLM_RECOVERING; + } res->state |= DLM_LOCK_RES_IN_PROGRESS; /* add lock to local (secondary) queue */ @@ -710,18 +716,10 @@ retry_lock: if (status == DLM_RECOVERING || status == DLM_MIGRATING || status == DLM_FORWARD) { - mlog(0, "retrying lock with migration/" - "recovery/in progress\n"); msleep(100); - /* no waiting for dlm_reco_thread */ if (recovery) { if (status != DLM_RECOVERING) goto retry_lock; - - mlog(0, "%s: got RECOVERING " - "for $RECOVERY lock, master " - "was %u\n", dlm->name, - res->owner); /* wait to see the node go down, then * drop down and allow the lockres to * get cleaned up. need to remaster. */ -- cgit From fc9f899483435935c1cd7005df29681929d1c99b Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 9 Jul 2011 18:04:39 +0200 Subject: fs/ocfs2/dlm/dlmlock.c: free kmem_cache_zalloc'd data using kmem_cache_free Memory allocated using kmem_cache_zalloc should be freed using kmem_cache_free, not kfree. The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression x,e,e1,e2; @@ x = kmem_cache_zalloc(e1,e2) ... when != x = e ?-kfree(x) +kmem_cache_free(e1,x) // Signed-off-by: Julia Lawall Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmlock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ocfs2/dlm/dlmlock.c') diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index f32fcba04923..975810b98492 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -438,7 +438,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie, /* zero memory only if kernel-allocated */ lksb = kzalloc(sizeof(*lksb), GFP_NOFS); if (!lksb) { - kfree(lock); + kmem_cache_free(dlm_lock_cache, lock); return NULL; } kernel_allocated = 1; -- cgit