summaryrefslogtreecommitdiff
path: root/net/unix/af_unix.c
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab+huawei@kernel.org>2021-08-05 16:28:43 +0200
committerMauro Carvalho Chehab <mchehab+huawei@kernel.org>2021-08-05 16:28:43 +0200
commit9c3a0f285248899dfa81585bc5d5bc9ebdb8fead (patch)
tree908decd51fbb0e1c774444b002e3b89dee3fae26 /net/unix/af_unix.c
parentbfee75f73c37a2f46a6326eaa06f5db701f76f01 (diff)
parentc500bee1c5b2f1d59b1081ac879d73268ab0ff17 (diff)
Merge tag 'v5.14-rc4' into media_tree
Linux 5.14-rc4 * tag 'v5.14-rc4': (948 commits) Linux 5.14-rc4 pipe: make pipe writes always wake up readers Revert "perf map: Fix dso->nsinfo refcounting" mm/memcg: fix NULL pointer dereference in memcg_slab_free_hook() slub: fix unreclaimable slab stat for bulk free mm/migrate: fix NR_ISOLATED corruption on 64-bit mm: memcontrol: fix blocking rstat function called from atomic cgroup1 thresholding code ocfs2: issue zeroout to EOF blocks ocfs2: fix zero out valid data lib/test_string.c: move string selftest in the Runtime Testing menu gve: Update MAINTAINERS list arch: Kconfig: clean up obsolete use of HAVE_IDE can: esd_usb2: fix memory leak can: ems_usb: fix memory leak can: usb_8dev: fix memory leak can: mcba_usb_start(): add missing urb->transfer_dma initialization can: hi311x: fix a signedness bug in hi3110_cmd() MAINTAINERS: add Yasushi SHOJI as reviewer for the Microchip CAN BUS Analyzer Tool driver scsi: fas216: Fix fall-through warning for Clang scsi: acornscsi: Fix fall-through warning for clang ...
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r--net/unix/af_unix.c51
1 files changed, 49 insertions, 2 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 23c92ad15c61..ba7ced947e51 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1526,6 +1526,53 @@ out:
return err;
}
+static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+
+ /*
+ * Garbage collection of unix sockets starts by selecting a set of
+ * candidate sockets which have reference only from being in flight
+ * (total_refs == inflight_refs). This condition is checked once during
+ * the candidate collection phase, and candidates are marked as such, so
+ * that non-candidates can later be ignored. While inflight_refs is
+ * protected by unix_gc_lock, total_refs (file count) is not, hence this
+ * is an instantaneous decision.
+ *
+ * Once a candidate, however, the socket must not be reinstalled into a
+ * file descriptor while the garbage collection is in progress.
+ *
+ * If the above conditions are met, then the directed graph of
+ * candidates (*) does not change while unix_gc_lock is held.
+ *
+ * Any operations that changes the file count through file descriptors
+ * (dup, close, sendmsg) does not change the graph since candidates are
+ * not installed in fds.
+ *
+ * Dequeing a candidate via recvmsg would install it into an fd, but
+ * that takes unix_gc_lock to decrement the inflight count, so it's
+ * serialized with garbage collection.
+ *
+ * MSG_PEEK is special in that it does not change the inflight count,
+ * yet does install the socket into an fd. The following lock/unlock
+ * pair is to ensure serialization with garbage collection. It must be
+ * done between incrementing the file count and installing the file into
+ * an fd.
+ *
+ * If garbage collection starts after the barrier provided by the
+ * lock/unlock, then it will see the elevated refcount and not mark this
+ * as a candidate. If a garbage collection is already in progress
+ * before the file count was incremented, then the lock/unlock pair will
+ * ensure that garbage collection is finished before progressing to
+ * installing the fd.
+ *
+ * (*) A -> B where B is on the queue of A or B is on the queue of C
+ * which is on the queue of listening socket A.
+ */
+ spin_lock(&unix_gc_lock);
+ spin_unlock(&unix_gc_lock);
+}
+
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
{
int err = 0;
@@ -2175,7 +2222,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
sk_peek_offset_fwd(sk, size);
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
}
err = (flags & MSG_TRUNC) ? skb->len - skip : size;
@@ -2418,7 +2465,7 @@ unlock:
/* It is questionable, see note in unix_dgram_recvmsg.
*/
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
sk_peek_offset_fwd(sk, chunk);