1 files changed, 351 insertions, 346 deletions
diff --git a/fs/pnode.c b/fs/pnode.c
index ffd429b760d5..81f7599bdac4 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -21,17 +21,12 @@ static inline struct mount *next_peer(struct mount *p)
 
 static inline struct mount *first_slave(struct mount *p)
 {
-	return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
-}
-
-static inline struct mount *last_slave(struct mount *p)
-{
-	return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
+	return hlist_entry(p->mnt_slave_list.first, struct mount, mnt_slave);
 }
 
 static inline struct mount *next_slave(struct mount *p)
 {
-	return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
+	return hlist_entry(p->mnt_slave.next, struct mount, mnt_slave);
 }
 
 static struct mount *get_peer_under_root(struct mount *mnt,
@@ -70,69 +65,90 @@ int get_dominating_id(struct mount *mnt, const struct path *root)
 	return 0;
 }
 
-static int do_make_slave(struct mount *mnt)
+static inline bool will_be_unmounted(struct mount *m)
 {
-	struct mount *master, *slave_mnt;
+	return m->mnt.mnt_flags & MNT_UMOUNT;
+}
 
-	if (list_empty(&mnt->mnt_share)) {
-		if (IS_MNT_SHARED(mnt)) {
-			mnt_release_group_id(mnt);
-			CLEAR_MNT_SHARED(mnt);
-		}
-		master = mnt->mnt_master;
-		if (!master) {
-			struct list_head *p = &mnt->mnt_slave_list;
-			while (!list_empty(p)) {
-				slave_mnt = list_first_entry(p,
-						struct mount, mnt_slave);
-				list_del_init(&slave_mnt->mnt_slave);
-				slave_mnt->mnt_master = NULL;
-			}
-			return 0;
-		}
-	} else {
+static struct mount *propagation_source(struct mount *mnt)
+{
+	do {
 		struct mount *m;
-		/*
-		 * slave 'mnt' to a peer mount that has the
-		 * same root dentry. If none is available then
-		 * slave it to anything that is available.
-		 */
-		for (m = master = next_peer(mnt); m != mnt; m = next_peer(m)) {
-			if (m->mnt.mnt_root == mnt->mnt.mnt_root) {
-				master = m;
-				break;
-			}
+		for (m = next_peer(mnt); m != mnt; m = next_peer(m)) {
+			if (!will_be_unmounted(m))
+				return m;
 		}
-		list_del_init(&mnt->mnt_share);
-		mnt->mnt_group_id = 0;
-		CLEAR_MNT_SHARED(mnt);
+		mnt = mnt->mnt_master;
+	} while (mnt && will_be_unmounted(mnt));
+	return mnt;
+}
+
+static void transfer_propagation(struct mount *mnt, struct mount *to)
+{
+	struct hlist_node *p = NULL, *n;
+	struct mount *m;
+
+	hlist_for_each_entry_safe(m, n, &mnt->mnt_slave_list, mnt_slave) {
+		m->mnt_master = to;
+		if (!to)
+			hlist_del_init(&m->mnt_slave);
+		else
+			p = &m->mnt_slave;
 	}
-	list_for_each_entry(slave_mnt, &mnt->mnt_slave_list, mnt_slave)
-		slave_mnt->mnt_master = master;
-	list_move(&mnt->mnt_slave, &master->mnt_slave_list);
-	list_splice(&mnt->mnt_slave_list, master->mnt_slave_list.prev);
-	INIT_LIST_HEAD(&mnt->mnt_slave_list);
-	mnt->mnt_master = master;
-	return 0;
+	if (p)
+		hlist_splice_init(&mnt->mnt_slave_list, p, &to->mnt_slave_list);
 }
 
 /*
- * vfsmount lock must be held for write
+ * EXCL[namespace_sem]
  */
 void change_mnt_propagation(struct mount *mnt, int type)
 {
+	struct mount *m = mnt->mnt_master;
+
 	if (type == MS_SHARED) {
 		set_mnt_shared(mnt);
 		return;
 	}
-	do_make_slave(mnt);
-	if (type != MS_SLAVE) {
-		list_del_init(&mnt->mnt_slave);
+	if (IS_MNT_SHARED(mnt)) {
+		m = propagation_source(mnt);
+		if (list_empty(&mnt->mnt_share)) {
+			mnt_release_group_id(mnt);
+		} else {
+			list_del_init(&mnt->mnt_share);
+			mnt->mnt_group_id = 0;
+		}
+		CLEAR_MNT_SHARED(mnt);
+		transfer_propagation(mnt, m);
+	}
+	hlist_del_init(&mnt->mnt_slave);
+	if (type == MS_SLAVE) {
+		mnt->mnt_master = m;
+		if (m)
+			hlist_add_head(&mnt->mnt_slave, &m->mnt_slave_list);
+	} else {
 		mnt->mnt_master = NULL;
 		if (type == MS_UNBINDABLE)
-			mnt->mnt.mnt_flags |= MNT_UNBINDABLE;
+			mnt->mnt_t_flags |= T_UNBINDABLE;
 		else
-			mnt->mnt.mnt_flags &= ~MNT_UNBINDABLE;
+			mnt->mnt_t_flags &= ~T_UNBINDABLE;
+	}
+}
+
+static struct mount *__propagation_next(struct mount *m,
+					 struct mount *origin)
+{
+	while (1) {
+		struct mount *master = m->mnt_master;
+
+		if (master == origin->mnt_master) {
+			struct mount *next = next_peer(m);
+			return (next == origin) ? NULL : next;
+		} else if (m->mnt_slave.next)
+			return next_slave(m);
+
+		/* back at master */
+		m = master;
 	}
 }
 
@@ -150,34 +166,24 @@ static struct mount *propagation_next(struct mount *m,
 					 struct mount *origin)
 {
 	/* are there any slaves of this mount? */
-	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
+	if (!IS_MNT_NEW(m) && !hlist_empty(&m->mnt_slave_list))
 		return first_slave(m);
 
-	while (1) {
-		struct mount *master = m->mnt_master;
-
-		if (master == origin->mnt_master) {
-			struct mount *next = next_peer(m);
-			return (next == origin) ? NULL : next;
-		} else if (m->mnt_slave.next != &master->mnt_slave_list)
-			return next_slave(m);
-
-		/* back at master */
-		m = master;
-	}
+	return __propagation_next(m, origin);
 }
 
 static struct mount *skip_propagation_subtree(struct mount *m,
 						struct mount *origin)
 {
 	/*
-	 * Advance m such that propagation_next will not return
-	 * the slaves of m.
+	 * Advance m past everything that gets propagation from it.
 	 */
-	if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
-		m = last_slave(m);
+	struct mount *p = __propagation_next(m, origin);
+
+	while (p && peers(m, p))
+		p = __propagation_next(p, origin);
 
-	return m;
+	return p;
 }
 
 static struct mount *next_group(struct mount *m, struct mount *origin)
@@ -185,7 +191,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
 	while (1) {
 		while (1) {
 			struct mount *next;
-			if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
+			if (!IS_MNT_NEW(m) && !hlist_empty(&m->mnt_slave_list))
 				return first_slave(m);
 			next = next_peer(m);
 			if (m->mnt_group_id == origin->mnt_group_id) {
@@ -198,7 +204,7 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
 		/* m is the last peer */
 		while (1) {
 			struct mount *master = m->mnt_master;
-			if (m->mnt_slave.next != &master->mnt_slave_list)
+			if (m->mnt_slave.next)
 				return next_slave(m);
 			m = next_peer(master);
 			if (master->mnt_group_id == origin->mnt_group_id)
@@ -212,142 +218,113 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
 	}
 }
 
-/* all accesses are serialized by namespace_sem */
-static struct mount *last_dest, *first_source, *last_source, *dest_master;
-static struct hlist_head *list;
-
-static inline bool peers(const struct mount *m1, const struct mount *m2)
-{
-	return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
-}
-
-static int propagate_one(struct mount *m, struct mountpoint *dest_mp)
+static bool need_secondary(struct mount *m, struct mountpoint *dest_mp)
 {
-	struct mount *child;
-	int type;
 	/* skip ones added by this propagate_mnt() */
 	if (IS_MNT_NEW(m))
-		return 0;
+		return false;
 	/* skip if mountpoint isn't visible in m */
 	if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
-		return 0;
+		return false;
 	/* skip if m is in the anon_ns */
 	if (is_anon_ns(m->mnt_ns))
-		return 0;
+		return false;
+	return true;
+}
 
-	if (peers(m, last_dest)) {
-		type = CL_MAKE_SHARED;
-	} else {
-		struct mount *n, *p;
-		bool done;
-		for (n = m; ; n = p) {
-			p = n->mnt_master;
-			if (p == dest_master || IS_MNT_MARKED(p))
-				break;
+static struct mount *find_master(struct mount *m,
+				struct mount *last_copy,
+				struct mount *original)
+{
+	struct mount *p;
+
+	// ascend until there's a copy for something with the same master
+	for (;;) {
+		p = m->mnt_master;
+		if (!p || IS_MNT_MARKED(p))
+			break;
+		m = p;
+	}
+	while (!peers(last_copy, original)) {
+		struct mount *parent = last_copy->mnt_parent;
+		if (parent->mnt_master == p) {
+			if (!peers(parent, m))
+				last_copy = last_copy->mnt_master;
+			break;
 		}
-		do {
-			struct mount *parent = last_source->mnt_parent;
-			if (peers(last_source, first_source))
-				break;
-			done = parent->mnt_master == p;
-			if (done && peers(n, parent))
-				break;
-			last_source = last_source->mnt_master;
-		} while (!done);
-
-		type = CL_SLAVE;
-		/* beginning of peer group among the slaves? */
-		if (IS_MNT_SHARED(m))
-			type |= CL_MAKE_SHARED;
+		last_copy = last_copy->mnt_master;
 	}
-		
-	child = copy_tree(last_source, last_source->mnt.mnt_root, type);
-	if (IS_ERR(child))
-		return PTR_ERR(child);
-	read_seqlock_excl(&mount_lock);
-	mnt_set_mountpoint(m, dest_mp, child);
-	if (m->mnt_master != dest_master)
-		SET_MNT_MARK(m->mnt_master);
-	read_sequnlock_excl(&mount_lock);
-	last_dest = m;
-	last_source = child;
-	hlist_add_head(&child->mnt_hash, list);
-	return count_mounts(m->mnt_ns, child);
+	return last_copy;
 }
 
-/*
- * mount 'source_mnt' under the destination 'dest_mnt' at
- * dentry 'dest_dentry'. And propagate that mount to
- * all the peer and slave mounts of 'dest_mnt'.
- * Link all the new mounts into a propagation tree headed at
- * source_mnt. Also link all the new mounts using ->mnt_list
- * headed at source_mnt's ->mnt_list
+/**
+ * propagate_mnt() - create secondary copies for tree attachment
+ * @dest_mnt:    destination mount.
+ * @dest_mp:     destination mountpoint.
+ * @source_mnt:  source mount.
+ * @tree_list:   list of secondaries to be attached.
  *
- * @dest_mnt: destination mount.
- * @dest_dentry: destination dentry.
- * @source_mnt: source mount.
- * @tree_list : list of heads of trees to be attached.
+ * Create secondary copies for attaching a tree with root @source_mnt
+ * at mount @dest_mnt with mountpoint @dest_mp.  Link all new mounts
+ * into a propagation graph.  Set mountpoints for all secondaries,
+ * link their roots into @tree_list via ->mnt_hash.
  */
 int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
-		    struct mount *source_mnt, struct hlist_head *tree_list)
+		  struct mount *source_mnt, struct hlist_head *tree_list)
 {
-	struct mount *m, *n;
-	int ret = 0;
-
-	/*
-	 * we don't want to bother passing tons of arguments to
-	 * propagate_one(); everything is serialized by namespace_sem,
-	 * so globals will do just fine.
-	 */
-	last_dest = dest_mnt;
-	first_source = source_mnt;
-	last_source = source_mnt;
-	list = tree_list;
-	dest_master = dest_mnt->mnt_master;
-
-	/* all peers of dest_mnt, except dest_mnt itself */
-	for (n = next_peer(dest_mnt); n != dest_mnt; n = next_peer(n)) {
-		ret = propagate_one(n, dest_mp);
-		if (ret)
-			goto out;
-	}
-
-	/* all slave groups */
-	for (m = next_group(dest_mnt, dest_mnt); m;
-			m = next_group(m, dest_mnt)) {
-		/* everything in that slave group */
-		n = m;
+	struct mount *m, *n, *copy, *this;
+	int err = 0, type;
+
+	if (dest_mnt->mnt_master)
+		SET_MNT_MARK(dest_mnt->mnt_master);
+
+	/* iterate over peer groups, depth first */
+	for (m = dest_mnt; m && !err; m = next_group(m, dest_mnt)) {
+		if (m == dest_mnt) { // have one for dest_mnt itself
+			copy = source_mnt;
+			type = CL_MAKE_SHARED;
+			n = next_peer(m);
+			if (n == m)
+				continue;
+		} else {
+			type = CL_SLAVE;
+			/* beginning of peer group among the slaves? */
+			if (IS_MNT_SHARED(m))
+				type |= CL_MAKE_SHARED;
+			n = m;
+		}
 		do {
-			ret = propagate_one(n, dest_mp);
-			if (ret)
-				goto out;
-			n = next_peer(n);
-		} while (n != m);
+			if (!need_secondary(n, dest_mp))
+				continue;
+			if (type & CL_SLAVE) // first in this peer group
+				copy = find_master(n, copy, source_mnt);
+			this = copy_tree(copy, copy->mnt.mnt_root, type);
+			if (IS_ERR(this)) {
+				err = PTR_ERR(this);
+				break;
+			}
+			read_seqlock_excl(&mount_lock);
+			mnt_set_mountpoint(n, dest_mp, this);
+			read_sequnlock_excl(&mount_lock);
+			if (n->mnt_master)
+				SET_MNT_MARK(n->mnt_master);
+			copy = this;
+			hlist_add_head(&this->mnt_hash, tree_list);
+			err = count_mounts(n->mnt_ns, this);
+			if (err)
+				break;
+			type = CL_MAKE_SHARED;
+		} while ((n = next_peer(n)) != m);
 	}
-out:
-	read_seqlock_excl(&mount_lock);
+
 	hlist_for_each_entry(n, tree_list, mnt_hash) {
 		m = n->mnt_parent;
-		if (m->mnt_master != dest_mnt->mnt_master)
+		if (m->mnt_master)
 			CLEAR_MNT_MARK(m->mnt_master);
 	}
-	read_sequnlock_excl(&mount_lock);
-	return ret;
-}
-
-static struct mount *find_topper(struct mount *mnt)
-{
-	/* If there is exactly one mount covering mnt completely return it. */
-	struct mount *child;
-
-	if (!list_is_singular(&mnt->mnt_mounts))
-		return NULL;
-
-	child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
-	if (child->mnt_mountpoint != mnt->mnt.mnt_root)
-		return NULL;
-
-	return child;
+	if (dest_mnt->mnt_master)
+		CLEAR_MNT_MARK(dest_mnt->mnt_master);
+	return err;
 }
 
 /*
@@ -407,12 +384,8 @@ bool propagation_would_overmount(const struct mount *from,
  */
 int propagate_mount_busy(struct mount *mnt, int refcnt)
 {
-	struct mount *m, *child, *topper;
 	struct mount *parent = mnt->mnt_parent;
 
-	if (mnt == parent)
-		return do_refcount_check(mnt, refcnt);
-
 	/*
 	 * quickly check if the current mount can be unmounted.
 	 * If not, we don't have to go checking for all other
@@ -421,23 +394,27 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
 	if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
 		return 1;
 
-	for (m = propagation_next(parent, parent); m;
+	if (mnt == parent)
+		return 0;
+
+	for (struct mount *m = propagation_next(parent, parent); m;
 	     		m = propagation_next(m, parent)) {
-		int count = 1;
-		child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
-		if (!child)
-			continue;
+		struct list_head *head;
+		struct mount *child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
 
-		/* Is there exactly one mount on the child that covers
-		 * it completely whose reference should be ignored?
-		 */
-		topper = find_topper(child);
-		if (topper)
-			count += 1;
-		else if (!list_empty(&child->mnt_mounts))
+		if (!child)
 			continue;
 
-		if (do_refcount_check(child, count))
+		head = &child->mnt_mounts;
+		if (!list_empty(head)) {
+			/*
+			 * a mount that covers child completely wouldn't prevent
+			 * it being pulled out; any other would.
+			 */
+			if (!list_is_singular(head) || !child->overmount)
+				continue;
+		}
+		if (do_refcount_check(child, 1))
 			return 1;
 	}
 	return 0;
@@ -463,181 +440,209 @@ void propagate_mount_unlock(struct mount *mnt)
 	}
 }
 
-static void umount_one(struct mount *mnt, struct list_head *to_umount)
+static inline bool is_candidate(struct mount *m)
 {
-	CLEAR_MNT_MARK(mnt);
-	mnt->mnt.mnt_flags |= MNT_UMOUNT;
-	list_del_init(&mnt->mnt_child);
-	list_del_init(&mnt->mnt_umounting);
-	move_from_ns(mnt, to_umount);
+	return m->mnt_t_flags & T_UMOUNT_CANDIDATE;
 }
 
-/*
- * NOTE: unmounting 'mnt' naturally propagates to all other mounts its
- * parent propagates to.
- */
-static bool __propagate_umount(struct mount *mnt,
-			       struct list_head *to_umount,
-			       struct list_head *to_restore)
+static void umount_one(struct mount *m, struct list_head *to_umount)
 {
-	bool progress = false;
-	struct mount *child;
-
-	/*
-	 * The state of the parent won't change if this mount is
-	 * already unmounted or marked as without children.
-	 */
-	if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
-		goto out;
+	m->mnt.mnt_flags |= MNT_UMOUNT;
+	list_del_init(&m->mnt_child);
+	move_from_ns(m);
+	list_add_tail(&m->mnt_list, to_umount);
+}
 
-	/* Verify topper is the only grandchild that has not been
-	 * speculatively unmounted.
-	 */
-	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
-		if (child->mnt_mountpoint == mnt->mnt.mnt_root)
-			continue;
-		if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
-			continue;
-		/* Found a mounted child */
-		goto children;
-	}
+static void remove_from_candidate_list(struct mount *m)
+{
+	m->mnt_t_flags &= ~(T_MARKED | T_UMOUNT_CANDIDATE);
+	list_del_init(&m->mnt_list);
+}
 
-	/* Mark mounts that can be unmounted if not locked */
-	SET_MNT_MARK(mnt);
-	progress = true;
+static void gather_candidates(struct list_head *set,
+			      struct list_head *candidates)
+{
+	struct mount *m, *p, *q;
 
-	/* If a mount is without children and not locked umount it. */
-	if (!IS_MNT_LOCKED(mnt)) {
-		umount_one(mnt, to_umount);
-	} else {
-children:
-		list_move_tail(&mnt->mnt_umounting, to_restore);
+	list_for_each_entry(m, set, mnt_list) {
+		if (is_candidate(m))
+			continue;
+		m->mnt_t_flags |= T_UMOUNT_CANDIDATE;
+		p = m->mnt_parent;
+		q = propagation_next(p, p);
+		while (q) {
+			struct mount *child = __lookup_mnt(&q->mnt,
+							   m->mnt_mountpoint);
+			if (child) {
+				/*
+				 * We might've already run into this one.  That
+				 * must've happened on earlier iteration of the
+				 * outer loop; in that case we can skip those
+				 * parents that get propagation from q - there
+				 * will be nothing new on those as well.
+				 */
+				if (is_candidate(child)) {
+					q = skip_propagation_subtree(q, p);
+					continue;
+				}
+				child->mnt_t_flags |= T_UMOUNT_CANDIDATE;
+				if (!will_be_unmounted(child))
+					list_add(&child->mnt_list, candidates);
+			}
+			q = propagation_next(q, p);
+		}
 	}
-out:
-	return progress;
+	list_for_each_entry(m, set, mnt_list)
+		m->mnt_t_flags &= ~T_UMOUNT_CANDIDATE;
 }
 
-static void umount_list(struct list_head *to_umount,
-			struct list_head *to_restore)
+/*
+ * We know that some child of @m can't be unmounted.  In all places where the
+ * chain of descent of @m has child not overmounting the root of parent,
+ * the parent can't be unmounted either.
+ */
+static void trim_ancestors(struct mount *m)
 {
-	struct mount *mnt, *child, *tmp;
-	list_for_each_entry(mnt, to_umount, mnt_list) {
-		list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
-			/* topper? */
-			if (child->mnt_mountpoint == mnt->mnt.mnt_root)
-				list_move_tail(&child->mnt_umounting, to_restore);
-			else
-				umount_one(child, to_umount);
-		}
+	struct mount *p;
+
+	for (p = m->mnt_parent; is_candidate(p); m = p, p = p->mnt_parent) {
+		if (IS_MNT_MARKED(m))	// all candidates beneath are overmounts
+			return;
+		SET_MNT_MARK(m);
+		if (m != p->overmount)
+			p->mnt_t_flags &= ~T_UMOUNT_CANDIDATE;
 	}
 }
 
-static void restore_mounts(struct list_head *to_restore)
+/*
+ * Find and exclude all umount candidates forbidden by @m
+ * (see Documentation/filesystems/propagate_umount.txt)
+ * If we can immediately tell that @m is OK to unmount (unlocked
+ * and all children are already committed to unmounting) commit
+ * to unmounting it.
+ * Only @m itself might be taken from the candidates list;
+ * anything found by trim_ancestors() is marked non-candidate
+ * and left on the list.
+ */
+static void trim_one(struct mount *m, struct list_head *to_umount)
 {
-	/* Restore mounts to a clean working state */
-	while (!list_empty(to_restore)) {
-		struct mount *mnt, *parent;
-		struct mountpoint *mp;
-
-		mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
-		CLEAR_MNT_MARK(mnt);
-		list_del_init(&mnt->mnt_umounting);
-
-		/* Should this mount be reparented? */
-		mp = mnt->mnt_mp;
-		parent = mnt->mnt_parent;
-		while (parent->mnt.mnt_flags & MNT_UMOUNT) {
-			mp = parent->mnt_mp;
-			parent = parent->mnt_parent;
-		}
-		if (parent != mnt->mnt_parent) {
-			mnt_change_mountpoint(parent, mp, mnt);
-			mnt_notify_add(mnt);
+	bool remove_this = false, found = false, umount_this = false;
+	struct mount *n;
+
+	if (!is_candidate(m)) { // trim_ancestors() left it on list
+		remove_from_candidate_list(m);
+		return;
+	}
+
+	list_for_each_entry(n, &m->mnt_mounts, mnt_child) {
+		if (!is_candidate(n)) {
+			found = true;
+			if (n != m->overmount) {
+				remove_this = true;
+				break;
+			}
 		}
 	}
+	if (found) {
+		trim_ancestors(m);
+	} else if (!IS_MNT_LOCKED(m) && list_empty(&m->mnt_mounts)) {
+		remove_this = true;
+		umount_this = true;
+	}
+	if (remove_this) {
+		remove_from_candidate_list(m);
+		if (umount_this)
+			umount_one(m, to_umount);
+	}
 }
 
-static void cleanup_umount_visitations(struct list_head *visited)
+static void handle_locked(struct mount *m, struct list_head *to_umount)
 {
-	while (!list_empty(visited)) {
-		struct mount *mnt =
-			list_first_entry(visited, struct mount, mnt_umounting);
-		list_del_init(&mnt->mnt_umounting);
+	struct mount *cutoff = m, *p;
+
+	if (!is_candidate(m)) { // trim_ancestors() left it on list
+		remove_from_candidate_list(m);
+		return;
+	}
+	for (p = m; is_candidate(p); p = p->mnt_parent) {
+		remove_from_candidate_list(p);
+		if (!IS_MNT_LOCKED(p))
+			cutoff = p->mnt_parent;
+	}
+	if (will_be_unmounted(p))
+		cutoff = p;
+	while (m != cutoff) {
+		umount_one(m, to_umount);
+		m = m->mnt_parent;
 	}
 }
 
 /*
- * collect all mounts that receive propagation from the mount in @list,
- * and return these additional mounts in the same list.
- * @list: the list of mounts to be unmounted.
+ * @m is not to going away, and it overmounts the top of a stack of mounts
+ * that are going away.  We know that all of those are fully overmounted
+ * by the one above (@m being the topmost of the chain), so @m can be slid
+ * in place where the bottom of the stack is attached.
  *
- * vfsmount lock must be held for write
+ * NOTE: here we temporarily violate a constraint - two mounts end up with
+ * the same parent and mountpoint; that will be remedied as soon as we
+ * return from propagate_umount() - its caller (umount_tree()) will detach
+ * the stack from the parent it (and now @m) is attached to.  umount_tree()
+ * might choose to keep unmounted pieces stuck to each other, but it always
+ * detaches them from the mounts that remain in the tree.
  */
-int propagate_umount(struct list_head *list)
+static void reparent(struct mount *m)
 {
-	struct mount *mnt;
-	LIST_HEAD(to_restore);
-	LIST_HEAD(to_umount);
-	LIST_HEAD(visited);
-
-	/* Find candidates for unmounting */
-	list_for_each_entry_reverse(mnt, list, mnt_list) {
-		struct mount *parent = mnt->mnt_parent;
-		struct mount *m;
+	struct mount *p = m;
+	struct mountpoint *mp;
 
-		/*
-		 * If this mount has already been visited it is known that it's
-		 * entire peer group and all of their slaves in the propagation
-		 * tree for the mountpoint has already been visited and there is
-		 * no need to visit them again.
-		 */
-		if (!list_empty(&mnt->mnt_umounting))
-			continue;
+	do {
+		mp = p->mnt_mp;
+		p = p->mnt_parent;
+	} while (will_be_unmounted(p));
 
-		list_add_tail(&mnt->mnt_umounting, &visited);
-		for (m = propagation_next(parent, parent); m;
-		     m = propagation_next(m, parent)) {
-			struct mount *child = __lookup_mnt(&m->mnt,
-							   mnt->mnt_mountpoint);
-			if (!child)
-				continue;
+	mnt_change_mountpoint(p, mp, m);
+	mnt_notify_add(m);
+}
 
-			if (!list_empty(&child->mnt_umounting)) {
-				/*
-				 * If the child has already been visited it is
-				 * know that it's entire peer group and all of
-				 * their slaves in the propgation tree for the
-				 * mountpoint has already been visited and there
-				 * is no need to visit this subtree again.
-				 */
-				m = skip_propagation_subtree(m, parent);
-				continue;
-			} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
-				/*
-				 * We have come across a partially unmounted
-				 * mount in a list that has not been visited
-				 * yet. Remember it has been visited and
-				 * continue about our merry way.
-				 */
-				list_add_tail(&child->mnt_umounting, &visited);
-				continue;
-			}
+/**
+ * propagate_umount - apply propagation rules to the set of mounts for umount()
+ * @set: the list of mounts to be unmounted.
+ *
+ * Collect all mounts that receive propagation from the mount in @set and have
+ * no obstacles to being unmounted.  Add these additional mounts to the set.
+ *
+ * See Documentation/filesystems/propagate_umount.txt if you do anything in
+ * this area.
+ *
+ * Locks held:
+ * mount_lock (write_seqlock), namespace_sem (exclusive).
+ */
+void propagate_umount(struct list_head *set)
+{
+	struct mount *m, *p;
+	LIST_HEAD(to_umount);	// committed to unmounting
+	LIST_HEAD(candidates);	// undecided umount candidates
 
-			/* Check the child and parents while progress is made */
-			while (__propagate_umount(child,
-						  &to_umount, &to_restore)) {
-				/* Is the parent a umount candidate? */
-				child = child->mnt_parent;
-				if (list_empty(&child->mnt_umounting))
-					break;
-			}
-		}
+	// collect all candidates
+	gather_candidates(set, &candidates);
+
+	// reduce the set until it's non-shifting
+	list_for_each_entry_safe(m, p, &candidates, mnt_list)
+		trim_one(m, &to_umount);
+
+	// ... and non-revealing
+	while (!list_empty(&candidates)) {
+		m = list_first_entry(&candidates,struct mount, mnt_list);
+		handle_locked(m, &to_umount);
 	}
 
-	umount_list(&to_umount, &to_restore);
-	restore_mounts(&to_restore);
-	cleanup_umount_visitations(&visited);
-	list_splice_tail(&to_umount, list);
+	// now to_umount consists of all acceptable candidates
+	// deal with reparenting of remaining overmounts on those
+	list_for_each_entry(m, &to_umount, mnt_list) {
+		if (m->overmount)
+			reparent(m->overmount);
+	}
 
-	return 0;
+	// and fold them into the set
+	list_splice_tail_init(&to_umount, set);
 }