summaryrefslogtreecommitdiff
path: root/net/unix/af_unix.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r--net/unix/af_unix.c260
1 files changed, 130 insertions, 130 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 79981d97bc9c..c1f403bed683 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -114,6 +114,7 @@
#include <linux/mount.h>
#include <net/checksum.h>
#include <linux/security.h>
+#include <linux/freezer.h>
struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
EXPORT_SYMBOL_GPL(unix_socket_table);
@@ -263,9 +264,8 @@ static struct sock *__unix_find_socket_byname(struct net *net,
int len, int type, unsigned int hash)
{
struct sock *s;
- struct hlist_node *node;
- sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
+ sk_for_each(s, &unix_socket_table[hash ^ type]) {
struct unix_sock *u = unix_sk(s);
if (!net_eq(sock_net(s), net))
@@ -298,10 +298,9 @@ static inline struct sock *unix_find_socket_byname(struct net *net,
static struct sock *unix_find_socket_byinode(struct inode *i)
{
struct sock *s;
- struct hlist_node *node;
spin_lock(&unix_table_lock);
- sk_for_each(s, node,
+ sk_for_each(s,
&unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
struct dentry *dentry = unix_sk(s)->path.dentry;
@@ -384,7 +383,7 @@ static void unix_sock_destructor(struct sock *sk)
#endif
}
-static int unix_release_sock(struct sock *sk, int embrion)
+static void unix_release_sock(struct sock *sk, int embrion)
{
struct unix_sock *u = unix_sk(sk);
struct path path;
@@ -441,7 +440,7 @@ static int unix_release_sock(struct sock *sk, int embrion)
/* ---- Socket is dead now and most probably destroyed ---- */
/*
- * Fixme: BSD difference: In BSD all sockets connected to use get
+ * Fixme: BSD difference: In BSD all sockets connected to us get
* ECONNRESET and we die on the spot. In Linux we behave
* like files and pipes do and wait for the last
* dereference.
@@ -453,8 +452,6 @@ static int unix_release_sock(struct sock *sk, int embrion)
if (unix_tot_inflight)
unix_gc(); /* Garbage collect fds */
-
- return 0;
}
static void init_peercred(struct sock *sk)
@@ -481,7 +478,6 @@ static int unix_listen(struct socket *sock, int backlog)
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
struct pid *old_pid = NULL;
- const struct cred *old_cred = NULL;
err = -EOPNOTSUPP;
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
@@ -503,8 +499,6 @@ static int unix_listen(struct socket *sock, int backlog)
out_unlock:
unix_state_unlock(sk);
put_pid(old_pid);
- if (old_cred)
- put_cred(old_cred);
out:
return err;
}
@@ -704,9 +698,10 @@ static int unix_release(struct socket *sock)
if (!sk)
return 0;
+ unix_release_sock(sk, 0);
sock->sk = NULL;
- return unix_release_sock(sk, 0);
+ return 0;
}
static int unix_autobind(struct socket *sock)
@@ -823,6 +818,34 @@ fail:
return NULL;
}
+static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+{
+ struct dentry *dentry;
+ struct path path;
+ int err = 0;
+ /*
+ * Get the parent directory, calculate the hash for last
+ * component.
+ */
+ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ return err;
+
+ /*
+ * All right, let's create it.
+ */
+ err = security_path_mknod(&path, dentry, mode, 0);
+ if (!err) {
+ err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
+ if (!err) {
+ res->mnt = mntget(path.mnt);
+ res->dentry = dget(dentry);
+ }
+ }
+ done_path_create(&path, dentry);
+ return err;
+}
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
@@ -831,8 +854,6 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
- struct dentry *dentry = NULL;
- struct path path;
int err;
unsigned int hash;
struct unix_address *addr;
@@ -869,43 +890,23 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
atomic_set(&addr->refcnt, 1);
if (sun_path[0]) {
- umode_t mode;
- err = 0;
- /*
- * Get the parent directory, calculate the hash for last
- * component.
- */
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- goto out_mknod_parent;
-
- /*
- * All right, let's create it.
- */
- mode = S_IFSOCK |
+ struct path path;
+ umode_t mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current_umask());
- err = mnt_want_write(path.mnt);
- if (err)
- goto out_mknod_dput;
- err = security_path_mknod(&path, dentry, mode, 0);
- if (err)
- goto out_mknod_drop_write;
- err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
-out_mknod_drop_write:
- mnt_drop_write(path.mnt);
- if (err)
- goto out_mknod_dput;
- mutex_unlock(&path.dentry->d_inode->i_mutex);
- dput(path.dentry);
- path.dentry = dentry;
-
+ err = unix_mknod(sun_path, mode, &path);
+ if (err) {
+ if (err == -EEXIST)
+ err = -EADDRINUSE;
+ unix_release_addr(addr);
+ goto out_up;
+ }
addr->hash = UNIX_HASH_SIZE;
- }
-
- spin_lock(&unix_table_lock);
-
- if (!sun_path[0]) {
+ hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
+ spin_lock(&unix_table_lock);
+ u->path = path;
+ list = &unix_socket_table[hash];
+ } else {
+ spin_lock(&unix_table_lock);
err = -EADDRINUSE;
if (__unix_find_socket_byname(net, sunaddr, addr_len,
sk->sk_type, hash)) {
@@ -914,9 +915,6 @@ out_mknod_drop_write:
}
list = &unix_socket_table[addr->hash];
- } else {
- list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
- u->path = path;
}
err = 0;
@@ -930,16 +928,6 @@ out_up:
mutex_unlock(&u->readlock);
out:
return err;
-
-out_mknod_dput:
- dput(dentry);
- mutex_unlock(&path.dentry->d_inode->i_mutex);
- path_put(&path);
-out_mknod_parent:
- if (err == -EEXIST)
- err = -EADDRINUSE;
- unix_release_addr(addr);
- goto out_up;
}
static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
@@ -1258,6 +1246,15 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
return 0;
}
+static void unix_sock_inherit_flags(const struct socket *old,
+ struct socket *new)
+{
+ if (test_bit(SOCK_PASSCRED, &old->flags))
+ set_bit(SOCK_PASSCRED, &new->flags);
+ if (test_bit(SOCK_PASSSEC, &old->flags))
+ set_bit(SOCK_PASSSEC, &new->flags);
+}
+
static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
{
struct sock *sk = sock->sk;
@@ -1292,6 +1289,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
/* attach accepted sock to socket */
unix_state_lock(tsk);
newsock->state = SS_CONNECTED;
+ unix_sock_inherit_flags(sock, newsock);
sock_graft(tsk, newsock);
unix_state_unlock(tsk);
return 0;
@@ -1353,7 +1351,6 @@ static void unix_destruct_scm(struct sk_buff *skb)
struct scm_cookie scm;
memset(&scm, 0, sizeof(scm));
scm.pid = UNIXCB(skb).pid;
- scm.cred = UNIXCB(skb).cred;
if (UNIXCB(skb).fp)
unix_detach_fds(&scm, skb);
@@ -1404,8 +1401,8 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
int err = 0;
UNIXCB(skb).pid = get_pid(scm->pid);
- if (scm->cred)
- UNIXCB(skb).cred = get_cred(scm->cred);
+ UNIXCB(skb).uid = scm->creds.uid;
+ UNIXCB(skb).gid = scm->creds.gid;
UNIXCB(skb).fp = NULL;
if (scm->fp && send_fds)
err = unix_attach_fds(scm, skb);
@@ -1422,13 +1419,13 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
const struct sock *other)
{
- if (UNIXCB(skb).cred)
+ if (UNIXCB(skb).pid)
return;
if (test_bit(SOCK_PASSCRED, &sock->flags) ||
!other->sk_socket ||
test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
UNIXCB(skb).pid = get_pid(task_tgid(current));
- UNIXCB(skb).cred = get_current_cred();
+ current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
}
}
@@ -1457,7 +1454,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
wait_for_unix_gc();
- err = scm_send(sock, msg, siocb->scm);
+ err = scm_send(sock, msg, siocb->scm, false);
if (err < 0)
return err;
@@ -1492,7 +1489,8 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
MAX_SKB_FRAGS * PAGE_SIZE);
skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
- msg->msg_flags & MSG_DONTWAIT, &err);
+ msg->msg_flags & MSG_DONTWAIT, &err,
+ PAGE_ALLOC_COSTLY_ORDER);
if (skb == NULL)
goto out;
@@ -1609,6 +1607,10 @@ out:
return err;
}
+/* We use paged skbs for stream sockets, and limit occupancy to 32768
+ * bytes, and a minimun of a full page.
+ */
+#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct msghdr *msg, size_t len)
@@ -1622,11 +1624,12 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct scm_cookie tmp_scm;
bool fds_sent = false;
int max_level;
+ int data_len;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
wait_for_unix_gc();
- err = scm_send(sock, msg, siocb->scm);
+ err = scm_send(sock, msg, siocb->scm, false);
if (err < 0)
return err;
@@ -1648,40 +1651,22 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
goto pipe_err;
while (sent < len) {
- /*
- * Optimisation for the fact that under 0.01% of X
- * messages typically need breaking up.
- */
-
- size = len-sent;
+ size = len - sent;
/* Keep two messages in the pipe so it schedules better */
- if (size > ((sk->sk_sndbuf >> 1) - 64))
- size = (sk->sk_sndbuf >> 1) - 64;
+ size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
- if (size > SKB_MAX_ALLOC)
- size = SKB_MAX_ALLOC;
+ /* allow fallback to order-0 allocations */
+ size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
- /*
- * Grab a buffer
- */
+ data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
- skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
- &err);
-
- if (skb == NULL)
+ skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
+ msg->msg_flags & MSG_DONTWAIT, &err,
+ get_order(UNIX_SKB_FRAGS_SZ));
+ if (!skb)
goto out_err;
- /*
- * If you pass two values to the sock_alloc_send_skb
- * it tries to grab the large buffer with GFP_NOFS
- * (which can fail easily), and if it fails grab the
- * fallback size buffer which is under a page and will
- * succeed. [Alan]
- */
- size = min_t(int, size, skb_tailroom(skb));
-
-
/* Only send the fds in the first buffer */
err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
if (err < 0) {
@@ -1691,7 +1676,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
max_level = err + 1;
fds_sent = true;
- err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+ skb_put(skb, size - data_len);
+ skb->data_len = data_len;
+ skb->len = size;
+ err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov,
+ sent, size);
if (err) {
kfree_skb(skb);
goto out_err;
@@ -1832,7 +1821,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
siocb->scm = &tmp_scm;
memset(&tmp_scm, 0, sizeof(tmp_scm));
}
- scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
+ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
unix_set_secdata(siocb->scm, skb);
if (!(flags & MSG_PEEK)) {
@@ -1872,10 +1861,10 @@ out:
}
/*
- * Sleep until data has arrive. But check for races..
+ * Sleep until more data has arrived. But check for races..
*/
-
-static long unix_stream_data_wait(struct sock *sk, long timeo)
+static long unix_stream_data_wait(struct sock *sk, long timeo,
+ struct sk_buff *last)
{
DEFINE_WAIT(wait);
@@ -1884,7 +1873,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (!skb_queue_empty(&sk->sk_receive_queue) ||
+ if (skb_peek_tail(&sk->sk_receive_queue) != last ||
sk->sk_err ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -1893,7 +1882,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
unix_state_unlock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = freezable_schedule_timeout(timeo);
unix_state_lock(sk);
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
}
@@ -1903,7 +1892,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
return timeo;
}
-
+static unsigned int unix_skb_len(const struct sk_buff *skb)
+{
+ return skb->len - UNIXCB(skb).consumed;
+}
static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size,
@@ -1949,14 +1941,12 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
goto out;
}
- skip = sk_peek_offset(sk, flags);
-
do {
int chunk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
unix_state_lock(sk);
- skb = skb_peek(&sk->sk_receive_queue);
+ last = skb = skb_peek(&sk->sk_receive_queue);
again:
if (skb == NULL) {
unix_sk(sk)->recursion_level = 0;
@@ -1979,7 +1969,7 @@ again:
break;
mutex_unlock(&u->readlock);
- timeo = unix_stream_data_wait(sk, timeo);
+ timeo = unix_stream_data_wait(sk, timeo, last);
if (signal_pending(current)
|| mutex_lock_interruptible(&u->readlock)) {
@@ -1993,10 +1983,13 @@ again:
break;
}
- if (skip >= skb->len) {
- skip -= skb->len;
+ skip = sk_peek_offset(sk, flags);
+ while (skip >= unix_skb_len(skb)) {
+ skip -= unix_skb_len(skb);
+ last = skb;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
- goto again;
+ if (!skb)
+ goto again;
}
unix_state_unlock(sk);
@@ -2004,11 +1997,12 @@ again:
if (check_creds) {
/* Never glue messages from different writers */
if ((UNIXCB(skb).pid != siocb->scm->pid) ||
- (UNIXCB(skb).cred != siocb->scm->cred))
+ !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
+ !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
break;
- } else {
+ } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
/* Copy credentials */
- scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
+ scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
check_creds = 1;
}
@@ -2018,8 +2012,9 @@ again:
sunaddr = NULL;
}
- chunk = min_t(unsigned int, skb->len - skip, size);
- if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) {
+ chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
+ if (skb_copy_datagram_iovec(skb, UNIXCB(skb).consumed + skip,
+ msg->msg_iov, chunk)) {
if (copied == 0)
copied = -EFAULT;
break;
@@ -2029,14 +2024,14 @@ again:
/* Mark read part of skb as used */
if (!(flags & MSG_PEEK)) {
- skb_pull(skb, chunk);
+ UNIXCB(skb).consumed += chunk;
sk_peek_offset_bwd(sk, chunk);
if (UNIXCB(skb).fp)
unix_detach_fds(siocb->scm, skb);
- if (skb->len)
+ if (unix_skb_len(skb))
break;
skb_unlink(skb, &sk->sk_receive_queue);
@@ -2067,10 +2062,14 @@ static int unix_shutdown(struct socket *sock, int mode)
struct sock *sk = sock->sk;
struct sock *other;
- mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
-
- if (!mode)
- return 0;
+ if (mode < SHUT_RD || mode > SHUT_RDWR)
+ return -EINVAL;
+ /* This maps:
+ * SHUT_RD (0) -> RCV_SHUTDOWN (1)
+ * SHUT_WR (1) -> SEND_SHUTDOWN (2)
+ * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
+ */
+ ++mode;
unix_state_lock(sk);
sk->sk_shutdown |= mode;
@@ -2116,7 +2115,7 @@ long unix_inq_len(struct sock *sk)
if (sk->sk_type == SOCK_STREAM ||
sk->sk_type == SOCK_SEQPACKET) {
skb_queue_walk(&sk->sk_receive_queue, skb)
- amount += skb->len;
+ amount += unix_skb_len(skb);
} else {
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
@@ -2205,7 +2204,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
- mask |= POLLERR;
+ mask |= POLLERR |
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
@@ -2408,7 +2409,7 @@ static int __net_init unix_net_init(struct net *net)
goto out;
#ifdef CONFIG_PROC_FS
- if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
+ if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
unix_sysctl_unregister(net);
goto out;
}
@@ -2421,7 +2422,7 @@ out:
static void __net_exit unix_net_exit(struct net *net)
{
unix_sysctl_unregister(net);
- proc_net_remove(net, "unix");
+ remove_proc_entry("unix", net->proc_net);
}
static struct pernet_operations unix_net_ops = {
@@ -2432,9 +2433,8 @@ static struct pernet_operations unix_net_ops = {
static int __init af_unix_init(void)
{
int rc = -1;
- struct sk_buff *dummy_skb;
- BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
+ BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
rc = proto_register(&unix_proto, 1);
if (rc != 0) {