1 files changed, 165 insertions, 238 deletions
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 1c7955f5cdaf..894d2bad6b6c 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -21,220 +21,162 @@
 #include "internal.h"
 
 /*
- * Create volume and callback interests on a server.
+ * Handle invalidation of an mmap'd file.  We invalidate all the PTEs referring
+ * to the pages in this file's pagecache, forcing the kernel to go through
+ * ->fault() or ->page_mkwrite() - at which point we can handle invalidation
+ * more fully.
  */
-static struct afs_cb_interest *afs_create_interest(struct afs_server *server,
-						   struct afs_vnode *vnode)
+void afs_invalidate_mmap_work(struct work_struct *work)
 {
-	struct afs_vol_interest *new_vi, *vi;
-	struct afs_cb_interest *new;
-	struct hlist_node **pp;
-
-	new_vi = kzalloc(sizeof(struct afs_vol_interest), GFP_KERNEL);
-	if (!new_vi)
-		return NULL;
-
-	new = kzalloc(sizeof(struct afs_cb_interest), GFP_KERNEL);
-	if (!new) {
-		kfree(new_vi);
-		return NULL;
-	}
+	struct afs_vnode *vnode = container_of(work, struct afs_vnode, cb_work);
 
-	new_vi->usage = 1;
-	new_vi->vid = vnode->volume->vid;
-	INIT_HLIST_NODE(&new_vi->srv_link);
-	INIT_HLIST_HEAD(&new_vi->cb_interests);
+	unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false);
+}
 
-	refcount_set(&new->usage, 1);
-	new->sb = vnode->vfs_inode.i_sb;
-	new->vid = vnode->volume->vid;
-	new->server = afs_get_server(server);
-	INIT_HLIST_NODE(&new->cb_vlink);
+static void afs_volume_init_callback(struct afs_volume *volume)
+{
+	struct afs_vnode *vnode;
 
-	write_lock(&server->cb_break_lock);
+	down_read(&volume->open_mmaps_lock);
 
-	for (pp = &server->cb_volumes.first; *pp; pp = &(*pp)->next) {
-		vi = hlist_entry(*pp, struct afs_vol_interest, srv_link);
-		if (vi->vid < new_vi->vid)
-			continue;
-		if (vi->vid > new_vi->vid)
-			break;
-		vi->usage++;
-		goto found_vi;
+	list_for_each_entry(vnode, &volume->open_mmaps, cb_mmap_link) {
+		if (vnode->cb_v_check != atomic_read(&volume->cb_v_break)) {
+			afs_clear_cb_promise(vnode, afs_cb_promise_clear_vol_init_cb);
+			queue_work(system_dfl_wq, &vnode->cb_work);
+		}
 	}
 
-	new_vi->srv_link.pprev = pp;
-	new_vi->srv_link.next = *pp;
-	if (*pp)
-		(*pp)->pprev = &new_vi->srv_link.next;
-	*pp = &new_vi->srv_link;
-	vi = new_vi;
-	new_vi = NULL;
-found_vi:
-
-	new->vol_interest = vi;
-	hlist_add_head(&new->cb_vlink, &vi->cb_interests);
-
-	write_unlock(&server->cb_break_lock);
-	kfree(new_vi);
-	return new;
+	up_read(&volume->open_mmaps_lock);
 }
 
 /*
- * Set up an interest-in-callbacks record for a volume on a server and
- * register it with the server.
- * - Called with vnode->io_lock held.
+ * Allow the fileserver to request callback state (re-)initialisation.
+ * Unfortunately, UUIDs are not guaranteed unique.
  */
-int afs_register_server_cb_interest(struct afs_vnode *vnode,
-				    struct afs_server_list *slist,
-				    unsigned int index)
+void afs_init_callback_state(struct afs_server *server)
 {
-	struct afs_server_entry *entry = &slist->servers[index];
-	struct afs_cb_interest *cbi, *vcbi, *new, *old;
-	struct afs_server *server = entry->server;
-
-again:
-	if (vnode->cb_interest &&
-	    likely(vnode->cb_interest == entry->cb_interest))
-		return 0;
-
-	read_lock(&slist->lock);
-	cbi = afs_get_cb_interest(entry->cb_interest);
-	read_unlock(&slist->lock);
-
-	vcbi = vnode->cb_interest;
-	if (vcbi) {
-		if (vcbi == cbi) {
-			afs_put_cb_interest(afs_v2net(vnode), cbi);
-			return 0;
-		}
+	struct afs_server_entry *se;
 
-		/* Use a new interest in the server list for the same server
-		 * rather than an old one that's still attached to a vnode.
-		 */
-		if (cbi && vcbi->server == cbi->server) {
-			write_seqlock(&vnode->cb_lock);
-			old = vnode->cb_interest;
-			vnode->cb_interest = cbi;
-			write_sequnlock(&vnode->cb_lock);
-			afs_put_cb_interest(afs_v2net(vnode), old);
-			return 0;
-		}
+	down_read(&server->cell->vs_lock);
 
-		/* Re-use the one attached to the vnode. */
-		if (!cbi && vcbi->server == server) {
-			write_lock(&slist->lock);
-			if (entry->cb_interest) {
-				write_unlock(&slist->lock);
-				afs_put_cb_interest(afs_v2net(vnode), cbi);
-				goto again;
-			}
-
-			entry->cb_interest = cbi;
-			write_unlock(&slist->lock);
-			return 0;
-		}
+	list_for_each_entry(se, &server->volumes, slink) {
+		se->cb_expires_at = AFS_NO_CB_PROMISE;
+		se->volume->cb_expires_at = AFS_NO_CB_PROMISE;
+		trace_afs_cb_v_break(se->volume->vid, atomic_read(&se->volume->cb_v_break),
+				     afs_cb_break_for_s_reinit);
+		if (!list_empty(&se->volume->open_mmaps))
+			afs_volume_init_callback(se->volume);
 	}
 
-	if (!cbi) {
-		new = afs_create_interest(server, vnode);
-		if (!new)
-			return -ENOMEM;
-
-		write_lock(&slist->lock);
-		if (!entry->cb_interest) {
-			entry->cb_interest = afs_get_cb_interest(new);
-			cbi = new;
-			new = NULL;
-		} else {
-			cbi = afs_get_cb_interest(entry->cb_interest);
-		}
-		write_unlock(&slist->lock);
-		afs_put_cb_interest(afs_v2net(vnode), new);
-	}
+	up_read(&server->cell->vs_lock);
+}
 
-	ASSERT(cbi);
+/*
+ * actually break a callback
+ */
+void __afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
+{
+	_enter("");
 
-	/* Change the server the vnode is using.  This entails scrubbing any
-	 * interest the vnode had in the previous server it was using.
-	 */
-	write_seqlock(&vnode->cb_lock);
+	clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
+	if (afs_clear_cb_promise(vnode, afs_cb_promise_clear_cb_break)) {
+		vnode->cb_break++;
+		vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
+		afs_clear_permits(vnode);
+
+		if (vnode->lock_state == AFS_VNODE_LOCK_WAITING_FOR_CB)
+			afs_lock_may_be_available(vnode);
 
-	old = vnode->cb_interest;
-	vnode->cb_interest = cbi;
-	vnode->cb_s_break = cbi->server->cb_s_break;
-	vnode->cb_v_break = vnode->volume->cb_v_break;
-	clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
+		if (reason != afs_cb_break_for_deleted &&
+		    vnode->status.type == AFS_FTYPE_FILE &&
+		    atomic_read(&vnode->cb_nr_mmap))
+			queue_work(system_dfl_wq, &vnode->cb_work);
 
+		trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, true);
+	} else {
+		trace_afs_cb_break(&vnode->fid, vnode->cb_break, reason, false);
+	}
+}
+
+void afs_break_callback(struct afs_vnode *vnode, enum afs_cb_break_reason reason)
+{
+	write_seqlock(&vnode->cb_lock);
+	__afs_break_callback(vnode, reason);
 	write_sequnlock(&vnode->cb_lock);
-	afs_put_cb_interest(afs_v2net(vnode), old);
-	return 0;
 }
 
 /*
- * Remove an interest on a server.
+ * Look up a volume by volume ID under RCU conditions.
  */
-void afs_put_cb_interest(struct afs_net *net, struct afs_cb_interest *cbi)
+static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell,
+						afs_volid_t vid)
 {
-	struct afs_vol_interest *vi;
+	struct afs_volume *volume = NULL;
+	struct rb_node *p;
+	int seq = 1;
+
+	for (;;) {
+		/* Unfortunately, rbtree walking doesn't give reliable results
+		 * under just the RCU read lock, so we have to check for
+		 * changes.
+		 */
+		seq++; /* 2 on the 1st/lockless path, otherwise odd */
+		read_seqbegin_or_lock(&cell->volume_lock, &seq);
 
-	if (cbi && refcount_dec_and_test(&cbi->usage)) {
-		if (!hlist_unhashed(&cbi->cb_vlink)) {
-			write_lock(&cbi->server->cb_break_lock);
+		p = rcu_dereference_raw(cell->volumes.rb_node);
+		while (p) {
+			volume = rb_entry(p, struct afs_volume, cell_node);
 
-			hlist_del_init(&cbi->cb_vlink);
-			vi = cbi->vol_interest;
-			cbi->vol_interest = NULL;
-			if (--vi->usage == 0)
-				hlist_del(&vi->srv_link);
+			if (volume->vid < vid)
+				p = rcu_dereference_raw(p->rb_left);
+			else if (volume->vid > vid)
+				p = rcu_dereference_raw(p->rb_right);
 			else
-				vi = NULL;
-
-			write_unlock(&cbi->server->cb_break_lock);
-			kfree(vi);
-			afs_put_server(net, cbi->server);
+				break;
+			volume = NULL;
 		}
-		kfree(cbi);
+
+		if (volume && afs_try_get_volume(volume, afs_volume_trace_get_callback))
+			break;
+		if (!need_seqretry(&cell->volume_lock, seq))
+			break;
+		seq |= 1; /* Want a lock next time */
 	}
-}
 
-/*
- * allow the fileserver to request callback state (re-)initialisation
- */
-void afs_init_callback_state(struct afs_server *server)
-{
-	if (!test_and_clear_bit(AFS_SERVER_FL_NEW, &server->flags))
-		server->cb_s_break++;
+	done_seqretry(&cell->volume_lock, seq);
+	return volume;
 }
 
 /*
- * actually break a callback
+ * Allow the fileserver to break callbacks at the volume-level.  This is
+ * typically done when, for example, a R/W volume is snapshotted to a R/O
+ * volume (the only way to change an R/O volume).  It may also, however, happen
+ * when a volserver takes control of a volume (offlining it, moving it, etc.).
+ *
+ * Every file in that volume will need to be reevaluated.
  */
-void __afs_break_callback(struct afs_vnode *vnode)
+static void afs_break_volume_callback(struct afs_server *server,
+				      struct afs_volume *volume)
+	__releases(RCU)
 {
-	_enter("");
+	struct afs_server_list *slist = rcu_dereference(volume->servers);
+	unsigned int i, cb_v_break;
 
-	clear_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
-	if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
-		vnode->cb_break++;
-		afs_clear_permits(vnode);
+	write_lock(&volume->cb_v_break_lock);
 
-		spin_lock(&vnode->lock);
+	for (i = 0; i < slist->nr_servers; i++)
+		if (slist->servers[i].server == server)
+			slist->servers[i].cb_expires_at = AFS_NO_CB_PROMISE;
+	volume->cb_expires_at = AFS_NO_CB_PROMISE;
 
-		_debug("break callback");
+	cb_v_break = atomic_inc_return_release(&volume->cb_v_break);
+	trace_afs_cb_v_break(volume->vid, cb_v_break, afs_cb_break_for_volume_callback);
 
-		if (list_empty(&vnode->granted_locks) &&
-		    !list_empty(&vnode->pending_locks))
-			afs_lock_may_be_available(vnode);
-		spin_unlock(&vnode->lock);
-	}
-}
+	write_unlock(&volume->cb_v_break_lock);
+	rcu_read_unlock();
 
-void afs_break_callback(struct afs_vnode *vnode)
-{
-	write_seqlock(&vnode->cb_lock);
-	__afs_break_callback(vnode);
-	write_sequnlock(&vnode->cb_lock);
+	if (!list_empty(&volume->open_mmaps))
+		afs_volume_init_callback(volume);
 }
 
 /*
@@ -244,59 +186,68 @@ void afs_break_callback(struct afs_vnode *vnode)
  *   - a lock is released
  */
 static void afs_break_one_callback(struct afs_server *server,
+				   struct afs_volume *volume,
 				   struct afs_fid *fid)
 {
-	struct afs_vol_interest *vi;
-	struct afs_cb_interest *cbi;
-	struct afs_iget_data data;
+	struct super_block *sb;
 	struct afs_vnode *vnode;
 	struct inode *inode;
 
-	read_lock(&server->cb_break_lock);
-	hlist_for_each_entry(vi, &server->cb_volumes, srv_link) {
-		if (vi->vid < fid->vid)
-			continue;
-		if (vi->vid > fid->vid) {
-			vi = NULL;
-			break;
-		}
-		//atomic_inc(&vi->usage);
-		break;
+	/* See if we can find a matching inode - even an I_NEW inode needs to
+	 * be marked as it can have its callback broken before we finish
+	 * setting up the local inode.
+	 */
+	sb = rcu_dereference(volume->sb);
+	if (!sb)
+		return;
+
+	inode = find_inode_rcu(sb, fid->vnode, afs_ilookup5_test_by_fid, fid);
+	if (inode) {
+		vnode = AFS_FS_I(inode);
+		afs_break_callback(vnode, afs_cb_break_for_callback);
+	} else {
+		trace_afs_cb_miss(fid, afs_cb_break_for_callback);
 	}
+}
 
-	/* TODO: Find all matching volumes if we couldn't match the server and
-	 * break them anyway.
-	 */
-	if (!vi)
-		goto out;
+static void afs_break_some_callbacks(struct afs_server *server,
+				     struct afs_callback_break *cbb,
+				     size_t *_count)
+{
+	struct afs_callback_break *residue = cbb;
+	struct afs_volume *volume;
+	afs_volid_t vid = cbb->fid.vid;
+	size_t i;
+
+	rcu_read_lock();
+	volume = afs_lookup_volume_rcu(server->cell, vid);
+	if (cbb->fid.vnode == 0 && cbb->fid.unique == 0) {
+		afs_break_volume_callback(server, volume);
+		*_count -= 1;
+		if (*_count)
+			memmove(cbb, cbb + 1, sizeof(*cbb) * *_count);
+	} else {
+		/* TODO: Find all matching volumes if we couldn't match the server and
+		 * break them anyway.
+		 */
 
-	/* Step through all interested superblocks.  There may be more than one
-	 * because of cell aliasing.
-	 */
-	hlist_for_each_entry(cbi, &vi->cb_interests, cb_vlink) {
-		if (fid->vnode == 0 && fid->unique == 0) {
-			/* The callback break applies to an entire volume. */
-			struct afs_super_info *as = AFS_FS_S(cbi->sb);
-			struct afs_volume *volume = as->volume;
-
-			write_lock(&volume->cb_break_lock);
-			volume->cb_v_break++;
-			write_unlock(&volume->cb_break_lock);
-		} else {
-			data.volume = NULL;
-			data.fid = *fid;
-			inode = ilookup5_nowait(cbi->sb, fid->vnode,
-						afs_iget5_test, &data);
-			if (inode) {
-				vnode = AFS_FS_I(inode);
-				afs_break_callback(vnode);
-				iput(inode);
+		for (i = *_count; i > 0; cbb++, i--) {
+			if (cbb->fid.vid == vid) {
+				_debug("- Fid { vl=%08llx n=%llu u=%u }",
+				       cbb->fid.vid,
+				       cbb->fid.vnode,
+				       cbb->fid.unique);
+				--*_count;
+				if (volume)
+					afs_break_one_callback(server, volume, &cbb->fid);
+			} else {
+				*residue++ = *cbb;
 			}
 		}
+		rcu_read_unlock();
 	}
 
-out:
-	read_unlock(&server->cb_break_lock);
+	afs_put_volume(volume, afs_volume_trace_put_callback);
 }
 
 /*
@@ -308,31 +259,7 @@ void afs_break_callbacks(struct afs_server *server, size_t count,
 	_enter("%p,%zu,", server, count);
 
 	ASSERT(server != NULL);
-	ASSERTCMP(count, <=, AFSCBMAX);
-
-	/* TODO: Sort the callback break list by volume ID */
 
-	for (; count > 0; callbacks++, count--) {
-		_debug("- Fid { vl=%08llx n=%llu u=%u }",
-		       callbacks->fid.vid,
-		       callbacks->fid.vnode,
-		       callbacks->fid.unique);
-		afs_break_one_callback(server, &callbacks->fid);
-	}
-
-	_leave("");
-	return;
-}
-
-/*
- * Clear the callback interests in a server list.
- */
-void afs_clear_callback_interests(struct afs_net *net, struct afs_server_list *slist)
-{
-	int i;
-
-	for (i = 0; i < slist->nr_servers; i++) {
-		afs_put_cb_interest(net, slist->servers[i].cb_interest);
-		slist->servers[i].cb_interest = NULL;
-	}
+	while (count > 0)
+		afs_break_some_callbacks(server, callbacks, &count);
 }