summaryrefslogtreecommitdiff
path: root/ipc/msg.c
diff options
context:
space:
mode:
Diffstat (limited to 'ipc/msg.c')
-rw-r--r--ipc/msg.c161
1 files changed, 120 insertions, 41 deletions
diff --git a/ipc/msg.c b/ipc/msg.c
index 0833c6405915..ee6af4fe52bf 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -39,6 +39,7 @@
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
#include <linux/rhashtable.h>
+#include <linux/percpu_counter.h>
#include <asm/current.h>
#include <linux/uaccess.h>
@@ -61,6 +62,16 @@ struct msg_queue {
struct list_head q_senders;
} __randomize_layout;
+/*
+ * MSG_BARRIER Locking:
+ *
+ * Similar to the optimization used in ipc/mqueue.c, one syscall return path
+ * does not acquire any locks when it sees that a message exists in
+ * msg_receiver.r_msg. Therefore r_msg is set using smp_store_release()
+ * and accessed using READ_ONCE()+smp_acquire__after_ctrl_dep(). In addition,
+ * wake_q_add_safe() is used. See ipc/mqueue.c for more details
+ */
+
/* one msg_receiver structure for each sleeping receiver */
struct msg_receiver {
struct list_head r_list;
@@ -120,7 +131,7 @@ static void msg_rcu_free(struct rcu_head *head)
struct msg_queue *msq = container_of(p, struct msg_queue, q_perm);
security_msg_queue_free(&msq->q_perm);
- kvfree(msq);
+ kfree(msq);
}
/**
@@ -137,7 +148,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
key_t key = params->key;
int msgflg = params->flg;
- msq = kvmalloc(sizeof(*msq), GFP_KERNEL);
+ msq = kmalloc(sizeof(*msq), GFP_KERNEL_ACCOUNT);
if (unlikely(!msq))
return -ENOMEM;
@@ -147,7 +158,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
msq->q_perm.security = NULL;
retval = security_msg_queue_alloc(&msq->q_perm);
if (retval) {
- kvfree(msq);
+ kfree(msq);
return retval;
}
@@ -184,6 +195,10 @@ static inline void ss_add(struct msg_queue *msq,
{
mss->tsk = current;
mss->msgsz = msgsz;
+ /*
+ * No memory barrier required: we did ipc_lock_object(),
+ * and the waker obtains that lock before calling wake_q_add().
+ */
__set_current_state(TASK_INTERRUPTIBLE);
list_add_tail(&mss->list, &msq->q_senders);
}
@@ -237,8 +252,13 @@ static void expunge_all(struct msg_queue *msq, int res,
struct msg_receiver *msr, *t;
list_for_each_entry_safe(msr, t, &msq->q_receivers, r_list) {
- wake_q_add(wake_q, msr->r_tsk);
- WRITE_ONCE(msr->r_msg, ERR_PTR(res));
+ struct task_struct *r_tsk;
+
+ r_tsk = get_task_struct(msr->r_tsk);
+
+ /* see MSG_BARRIER for purpose/pairing */
+ smp_store_release(&msr->r_msg, ERR_PTR(res));
+ wake_q_add_safe(wake_q, r_tsk);
}
}
@@ -251,6 +271,8 @@ static void expunge_all(struct msg_queue *msq, int res,
* before freeque() is called. msg_ids.rwsem remains locked on exit.
*/
static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
+ __releases(RCU)
+ __releases(&msq->q_perm)
{
struct msg_msg *msg, *t;
struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
@@ -264,10 +286,10 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
rcu_read_unlock();
list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
- atomic_dec(&ns->msg_hdrs);
+ percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
free_msg(msg);
}
- atomic_sub(msq->q_cbytes, &ns->msg_bytes);
+ percpu_counter_sub_local(&ns->percpu_msg_bytes, msq->q_cbytes);
ipc_update_pid(&msq->q_lspid, NULL);
ipc_update_pid(&msq->q_lrpid, NULL);
ipc_rcu_putref(&msq->q_perm, msg_rcu_free);
@@ -377,7 +399,7 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
* NOTE: no locks must be held, the rwsem is taken inside this function.
*/
static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
- struct msqid64_ds *msqid64)
+ struct ipc64_perm *perm, int msg_qbytes)
{
struct kern_ipc_perm *ipcp;
struct msg_queue *msq;
@@ -387,7 +409,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
rcu_read_lock();
ipcp = ipcctl_obtain_check(ns, &msg_ids(ns), msqid, cmd,
- &msqid64->msg_perm, msqid64->msg_qbytes);
+ perm, msg_qbytes);
if (IS_ERR(ipcp)) {
err = PTR_ERR(ipcp);
goto out_unlock1;
@@ -409,18 +431,18 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
{
DEFINE_WAKE_Q(wake_q);
- if (msqid64->msg_qbytes > ns->msg_ctlmnb &&
+ if (msg_qbytes > ns->msg_ctlmnb &&
!capable(CAP_SYS_RESOURCE)) {
err = -EPERM;
goto out_unlock1;
}
ipc_lock_object(&msq->q_perm);
- err = ipc_update_perm(&msqid64->msg_perm, ipcp);
+ err = ipc_update_perm(perm, ipcp);
if (err)
goto out_unlock0;
- msq->q_qbytes = msqid64->msg_qbytes;
+ msq->q_qbytes = msg_qbytes;
msq->q_ctime = ktime_get_real_seconds();
/*
@@ -474,17 +496,22 @@ static int msgctl_info(struct ipc_namespace *ns, int msqid,
msginfo->msgssz = MSGSSZ;
msginfo->msgseg = MSGSEG;
down_read(&msg_ids(ns).rwsem);
- if (cmd == MSG_INFO) {
+ if (cmd == MSG_INFO)
msginfo->msgpool = msg_ids(ns).in_use;
- msginfo->msgmap = atomic_read(&ns->msg_hdrs);
- msginfo->msgtql = atomic_read(&ns->msg_bytes);
+ max_idx = ipc_get_maxidx(&msg_ids(ns));
+ up_read(&msg_ids(ns).rwsem);
+ if (cmd == MSG_INFO) {
+ msginfo->msgmap = min_t(int,
+ percpu_counter_sum(&ns->percpu_msg_hdrs),
+ INT_MAX);
+ msginfo->msgtql = min_t(int,
+ percpu_counter_sum(&ns->percpu_msg_bytes),
+ INT_MAX);
} else {
msginfo->msgmap = MSGMAP;
msginfo->msgpool = MSGPOOL;
msginfo->msgtql = MSGTQL;
}
- max_idx = ipc_get_maxidx(&msg_ids(ns));
- up_read(&msg_ids(ns).rwsem);
return (max_idx < 0) ? 0 : max_idx;
}
@@ -567,9 +594,8 @@ out_unlock:
return err;
}
-long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
+static long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf, int version)
{
- int version;
struct ipc_namespace *ns;
struct msqid64_ds msqid64;
int err;
@@ -577,7 +603,6 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
if (msqid < 0 || cmd < 0)
return -EINVAL;
- version = ipc_parse_version(&cmd);
ns = current->nsproxy->ipc_ns;
switch (cmd) {
@@ -603,9 +628,10 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
case IPC_SET:
if (copy_msqid_from_user(&msqid64, buf, version))
return -EFAULT;
- /* fallthru */
+ return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm,
+ msqid64.msg_qbytes);
case IPC_RMID:
- return msgctl_down(ns, msqid, cmd, &msqid64);
+ return msgctl_down(ns, msqid, cmd, NULL, 0);
default:
return -EINVAL;
}
@@ -613,9 +639,23 @@ long ksys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
{
- return ksys_msgctl(msqid, cmd, buf);
+ return ksys_msgctl(msqid, cmd, buf, IPC_64);
}
+#ifdef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
+long ksys_old_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
+{
+ int version = ipc_parse_version(&cmd);
+
+ return ksys_msgctl(msqid, cmd, buf, version);
+}
+
+SYSCALL_DEFINE3(old_msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf)
+{
+ return ksys_old_msgctl(msqid, cmd, buf);
+}
+#endif
+
#ifdef CONFIG_COMPAT
struct compat_msqid_ds {
@@ -689,12 +729,11 @@ static int copy_compat_msqid_to_user(void __user *buf, struct msqid64_ds *in,
}
}
-long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr)
+static long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr, int version)
{
struct ipc_namespace *ns;
int err;
struct msqid64_ds msqid64;
- int version = compat_ipc_parse_version(&cmd);
ns = current->nsproxy->ipc_ns;
@@ -724,9 +763,9 @@ long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr)
case IPC_SET:
if (copy_compat_msqid_from_user(&msqid64, uptr, version))
return -EFAULT;
- /* fallthru */
+ return msgctl_down(ns, msqid, cmd, &msqid64.msg_perm, msqid64.msg_qbytes);
case IPC_RMID:
- return msgctl_down(ns, msqid, cmd, &msqid64);
+ return msgctl_down(ns, msqid, cmd, NULL, 0);
default:
return -EINVAL;
}
@@ -734,9 +773,23 @@ long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr)
COMPAT_SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, void __user *, uptr)
{
- return compat_ksys_msgctl(msqid, cmd, uptr);
+ return compat_ksys_msgctl(msqid, cmd, uptr, IPC_64);
+}
+
+#ifdef CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION
+long compat_ksys_old_msgctl(int msqid, int cmd, void __user *uptr)
+{
+ int version = compat_ipc_parse_version(&cmd);
+
+ return compat_ksys_msgctl(msqid, cmd, uptr, version);
+}
+
+COMPAT_SYSCALL_DEFINE3(old_msgctl, int, msqid, int, cmd, void __user *, uptr)
+{
+ return compat_ksys_old_msgctl(msqid, cmd, uptr);
}
#endif
+#endif
static int testmsg(struct msg_msg *msg, long type, int mode)
{
@@ -773,13 +826,17 @@ static inline int pipelined_send(struct msg_queue *msq, struct msg_msg *msg,
list_del(&msr->r_list);
if (msr->r_maxsize < msg->m_ts) {
wake_q_add(wake_q, msr->r_tsk);
- WRITE_ONCE(msr->r_msg, ERR_PTR(-E2BIG));
+
+ /* See expunge_all regarding memory barrier */
+ smp_store_release(&msr->r_msg, ERR_PTR(-E2BIG));
} else {
ipc_update_pid(&msq->q_lrpid, task_pid(msr->r_tsk));
msq->q_rtime = ktime_get_real_seconds();
wake_q_add(wake_q, msr->r_tsk);
- WRITE_ONCE(msr->r_msg, msg);
+
+ /* See expunge_all regarding memory barrier */
+ smp_store_release(&msr->r_msg, msg);
return 1;
}
}
@@ -884,8 +941,8 @@ static long do_msgsnd(int msqid, long mtype, void __user *mtext,
list_add_tail(&msg->m_list, &msq->q_messages);
msq->q_cbytes += msgsz;
msq->q_qnum++;
- atomic_add(msgsz, &ns->msg_bytes);
- atomic_inc(&ns->msg_hdrs);
+ percpu_counter_add_local(&ns->percpu_msg_bytes, msgsz);
+ percpu_counter_add_local(&ns->percpu_msg_hdrs, 1);
}
err = 0;
@@ -921,7 +978,7 @@ SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
struct compat_msgbuf {
compat_long_t mtype;
- char mtext[1];
+ char mtext[];
};
long compat_ksys_msgsnd(int msqid, compat_uptr_t msgp,
@@ -1108,8 +1165,8 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
msq->q_rtime = ktime_get_real_seconds();
ipc_update_pid(&msq->q_lrpid, task_tgid(current));
msq->q_cbytes -= msg->m_ts;
- atomic_sub(msg->m_ts, &ns->msg_bytes);
- atomic_dec(&ns->msg_hdrs);
+ percpu_counter_sub_local(&ns->percpu_msg_bytes, msg->m_ts);
+ percpu_counter_sub_local(&ns->percpu_msg_hdrs, 1);
ss_wakeup(msq, &wake_q, false);
goto out_unlock0;
@@ -1129,7 +1186,11 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
msr_d.r_maxsize = INT_MAX;
else
msr_d.r_maxsize = bufsz;
- msr_d.r_msg = ERR_PTR(-EAGAIN);
+
+ /* memory barrier not require due to ipc_lock_object() */
+ WRITE_ONCE(msr_d.r_msg, ERR_PTR(-EAGAIN));
+
+ /* memory barrier not required, we own ipc_lock_object() */
__set_current_state(TASK_INTERRUPTIBLE);
ipc_unlock_object(&msq->q_perm);
@@ -1158,8 +1219,12 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
* signal) it will either see the message and continue ...
*/
msg = READ_ONCE(msr_d.r_msg);
- if (msg != ERR_PTR(-EAGAIN))
+ if (msg != ERR_PTR(-EAGAIN)) {
+ /* see MSG_BARRIER for purpose/pairing */
+ smp_acquire__after_ctrl_dep();
+
goto out_unlock1;
+ }
/*
* ... or see -EAGAIN, acquire the lock to check the message
@@ -1167,7 +1232,7 @@ static long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, in
*/
ipc_lock_object(&msq->q_perm);
- msg = msr_d.r_msg;
+ msg = READ_ONCE(msr_d.r_msg);
if (msg != ERR_PTR(-EAGAIN))
goto out_unlock0;
@@ -1238,15 +1303,27 @@ COMPAT_SYSCALL_DEFINE5(msgrcv, int, msqid, compat_uptr_t, msgp,
}
#endif
-void msg_init_ns(struct ipc_namespace *ns)
+int msg_init_ns(struct ipc_namespace *ns)
{
+ int ret;
+
ns->msg_ctlmax = MSGMAX;
ns->msg_ctlmnb = MSGMNB;
ns->msg_ctlmni = MSGMNI;
- atomic_set(&ns->msg_bytes, 0);
- atomic_set(&ns->msg_hdrs, 0);
+ ret = percpu_counter_init(&ns->percpu_msg_bytes, 0, GFP_KERNEL);
+ if (ret)
+ goto fail_msg_bytes;
+ ret = percpu_counter_init(&ns->percpu_msg_hdrs, 0, GFP_KERNEL);
+ if (ret)
+ goto fail_msg_hdrs;
ipc_init_ids(&ns->ids[IPC_MSG_IDS]);
+ return 0;
+
+fail_msg_hdrs:
+ percpu_counter_destroy(&ns->percpu_msg_bytes);
+fail_msg_bytes:
+ return ret;
}
#ifdef CONFIG_IPC_NS
@@ -1255,6 +1332,8 @@ void msg_exit_ns(struct ipc_namespace *ns)
free_ipcs(ns, &msg_ids(ns), freeque);
idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
+ percpu_counter_destroy(&ns->percpu_msg_bytes);
+ percpu_counter_destroy(&ns->percpu_msg_hdrs);
}
#endif