summaryrefslogtreecommitdiff
path: root/ipc
diff options
context:
space:
mode:
Diffstat (limited to 'ipc')
-rw-r--r--ipc/ipc_sysctl.c14
-rw-r--r--ipc/mqueue.c82
-rw-r--r--ipc/msgutil.c6
-rw-r--r--ipc/util.c49
-rw-r--r--ipc/util.h47
5 files changed, 141 insertions, 57 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 49f9bf4ffc7f..bfaae457810c 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -120,7 +120,9 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
static int zero;
static int one = 1;
static int int_max = INT_MAX;
-static int ipc_mni = IPCMNI;
+int ipc_mni = IPCMNI;
+int ipc_mni_shift = IPCMNI_SHIFT;
+int ipc_min_cycle = RADIX_TREE_MAP_SIZE;
static struct ctl_table ipc_kern_table[] = {
{
@@ -246,3 +248,13 @@ static int __init ipc_sysctl_init(void)
}
device_initcall(ipc_sysctl_init);
+
+static int __init ipc_mni_extend(char *str)
+{
+ ipc_mni = IPCMNI_EXTEND;
+ ipc_mni_shift = IPCMNI_EXTEND_SHIFT;
+ ipc_min_cycle = IPCMNI_EXTEND_MIN_CYCLE;
+ pr_info("IPCMNI extended to %d.\n", ipc_mni);
+ return 0;
+}
+early_param("ipcmni_extend", ipc_mni_extend);
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index aea30530c472..216cad1ff0d0 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -76,6 +76,7 @@ struct mqueue_inode_info {
wait_queue_head_t wait_q;
struct rb_root msg_tree;
+ struct rb_node *msg_tree_rightmost;
struct posix_msg_tree_node *node_cache;
struct mq_attr attr;
@@ -131,6 +132,7 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
{
struct rb_node **p, *parent = NULL;
struct posix_msg_tree_node *leaf;
+ bool rightmost = true;
p = &info->msg_tree.rb_node;
while (*p) {
@@ -139,9 +141,10 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
if (likely(leaf->priority == msg->m_type))
goto insert_msg;
- else if (msg->m_type < leaf->priority)
+ else if (msg->m_type < leaf->priority) {
p = &(*p)->rb_left;
- else
+ rightmost = false;
+ } else
p = &(*p)->rb_right;
}
if (info->node_cache) {
@@ -154,6 +157,10 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
INIT_LIST_HEAD(&leaf->msg_list);
}
leaf->priority = msg->m_type;
+
+ if (rightmost)
+ info->msg_tree_rightmost = &leaf->rb_node;
+
rb_link_node(&leaf->rb_node, parent, p);
rb_insert_color(&leaf->rb_node, &info->msg_tree);
insert_msg:
@@ -163,23 +170,35 @@ insert_msg:
return 0;
}
+static inline void msg_tree_erase(struct posix_msg_tree_node *leaf,
+ struct mqueue_inode_info *info)
+{
+ struct rb_node *node = &leaf->rb_node;
+
+ if (info->msg_tree_rightmost == node)
+ info->msg_tree_rightmost = rb_prev(node);
+
+ rb_erase(node, &info->msg_tree);
+ if (info->node_cache) {
+ kfree(leaf);
+ } else {
+ info->node_cache = leaf;
+ }
+}
+
static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
{
- struct rb_node **p, *parent = NULL;
+ struct rb_node *parent = NULL;
struct posix_msg_tree_node *leaf;
struct msg_msg *msg;
try_again:
- p = &info->msg_tree.rb_node;
- while (*p) {
- parent = *p;
- /*
- * During insert, low priorities go to the left and high to the
- * right. On receive, we want the highest priorities first, so
- * walk all the way to the right.
- */
- p = &(*p)->rb_right;
- }
+ /*
+ * During insert, low priorities go to the left and high to the
+ * right. On receive, we want the highest priorities first, so
+ * walk all the way to the right.
+ */
+ parent = info->msg_tree_rightmost;
if (!parent) {
if (info->attr.mq_curmsgs) {
pr_warn_once("Inconsistency in POSIX message queue, "
@@ -194,24 +213,14 @@ try_again:
pr_warn_once("Inconsistency in POSIX message queue, "
"empty leaf node but we haven't implemented "
"lazy leaf delete!\n");
- rb_erase(&leaf->rb_node, &info->msg_tree);
- if (info->node_cache) {
- kfree(leaf);
- } else {
- info->node_cache = leaf;
- }
+ msg_tree_erase(leaf, info);
goto try_again;
} else {
msg = list_first_entry(&leaf->msg_list,
struct msg_msg, m_list);
list_del(&msg->m_list);
if (list_empty(&leaf->msg_list)) {
- rb_erase(&leaf->rb_node, &info->msg_tree);
- if (info->node_cache) {
- kfree(leaf);
- } else {
- info->node_cache = leaf;
- }
+ msg_tree_erase(leaf, info);
}
}
info->attr.mq_curmsgs--;
@@ -254,6 +263,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
info->qsize = 0;
info->user = NULL; /* set when all is ok */
info->msg_tree = RB_ROOT;
+ info->msg_tree_rightmost = NULL;
info->node_cache = NULL;
memset(&info->attr, 0, sizeof(info->attr));
info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
@@ -419,24 +429,19 @@ static struct inode *mqueue_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void mqueue_i_callback(struct rcu_head *head)
+static void mqueue_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
}
-static void mqueue_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, mqueue_i_callback);
-}
-
static void mqueue_evict_inode(struct inode *inode)
{
struct mqueue_inode_info *info;
struct user_struct *user;
unsigned long mq_bytes, mq_treesize;
struct ipc_namespace *ipc_ns;
- struct msg_msg *msg;
+ struct msg_msg *msg, *nmsg;
+ LIST_HEAD(tmp_msg);
clear_inode(inode);
@@ -447,10 +452,15 @@ static void mqueue_evict_inode(struct inode *inode)
info = MQUEUE_I(inode);
spin_lock(&info->lock);
while ((msg = msg_get(info)) != NULL)
- free_msg(msg);
+ list_add_tail(&msg->m_list, &tmp_msg);
kfree(info->node_cache);
spin_unlock(&info->lock);
+ list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) {
+ list_del(&msg->m_list);
+ free_msg(msg);
+ }
+
/* Total amount of bytes accounted for the mqueue */
mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
@@ -611,8 +621,6 @@ static void wq_add(struct mqueue_inode_info *info, int sr,
{
struct ext_wait_queue *walk;
- ewp->task = current;
-
list_for_each_entry(walk, &info->e_wait_q[sr].list, list) {
if (walk->task->prio <= current->prio) {
list_add_tail(&ewp->list, &walk->list);
@@ -1562,7 +1570,7 @@ static const struct file_operations mqueue_file_operations = {
static const struct super_operations mqueue_super_ops = {
.alloc_inode = mqueue_alloc_inode,
- .destroy_inode = mqueue_destroy_inode,
+ .free_inode = mqueue_free_inode,
.evict_inode = mqueue_evict_inode,
.statfs = simple_statfs,
};
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 84598025a6ad..e65593742e2b 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -18,6 +18,7 @@
#include <linux/utsname.h>
#include <linux/proc_ns.h>
#include <linux/uaccess.h>
+#include <linux/sched.h>
#include "util.h"
@@ -64,6 +65,9 @@ static struct msg_msg *alloc_msg(size_t len)
pseg = &msg->next;
while (len > 0) {
struct msg_msgseg *seg;
+
+ cond_resched();
+
alen = min(len, DATALEN_SEG);
seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT);
if (seg == NULL)
@@ -176,6 +180,8 @@ void free_msg(struct msg_msg *msg)
kfree(msg);
while (seg != NULL) {
struct msg_msgseg *tmp = seg->next;
+
+ cond_resched();
kfree(seg);
seg = tmp;
}
diff --git a/ipc/util.c b/ipc/util.c
index 0af05752969f..d126d156efc6 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -101,7 +101,6 @@ static const struct rhashtable_params ipc_kht_params = {
.head_offset = offsetof(struct kern_ipc_perm, khtnode),
.key_offset = offsetof(struct kern_ipc_perm, key),
.key_len = FIELD_SIZEOF(struct kern_ipc_perm, key),
- .locks_mul = 1,
.automatic_shrinking = true,
};
@@ -110,7 +109,7 @@ static const struct rhashtable_params ipc_kht_params = {
* @ids: ipc identifier set
*
* Set up the sequence range to use for the ipc identifier range (limited
- * below IPCMNI) then initialise the keys hashtable and ids idr.
+ * below ipc_mni) then initialise the keys hashtable and ids idr.
*/
void ipc_init_ids(struct ipc_ids *ids)
{
@@ -120,6 +119,7 @@ void ipc_init_ids(struct ipc_ids *ids)
rhashtable_init(&ids->key_ht, &ipc_kht_params);
idr_init(&ids->ipcs_idr);
ids->max_idx = -1;
+ ids->last_idx = -1;
#ifdef CONFIG_CHECKPOINT_RESTORE
ids->next_id = -1;
#endif
@@ -193,6 +193,10 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
*
* The caller must own kern_ipc_perm.lock.of the new object.
* On error, the function returns a (negative) error code.
+ *
+ * To conserve sequence number space, especially with extended ipc_mni,
+ * the sequence number is incremented only when the returned ID is less than
+ * the last one.
*/
static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
{
@@ -216,17 +220,42 @@ static inline int ipc_idr_alloc(struct ipc_ids *ids, struct kern_ipc_perm *new)
*/
if (next_id < 0) { /* !CHECKPOINT_RESTORE or next_id is unset */
- new->seq = ids->seq++;
- if (ids->seq > IPCID_SEQ_MAX)
- ids->seq = 0;
- idx = idr_alloc(&ids->ipcs_idr, new, 0, 0, GFP_NOWAIT);
+ int max_idx;
+
+ max_idx = max(ids->in_use*3/2, ipc_min_cycle);
+ max_idx = min(max_idx, ipc_mni);
+
+ /* allocate the idx, with a NULL struct kern_ipc_perm */
+ idx = idr_alloc_cyclic(&ids->ipcs_idr, NULL, 0, max_idx,
+ GFP_NOWAIT);
+
+ if (idx >= 0) {
+ /*
+ * idx got allocated successfully.
+ * Now calculate the sequence number and set the
+ * pointer for real.
+ */
+ if (idx <= ids->last_idx) {
+ ids->seq++;
+ if (ids->seq >= ipcid_seq_max())
+ ids->seq = 0;
+ }
+ ids->last_idx = idx;
+
+ new->seq = ids->seq;
+ /* no need for smp_wmb(), this is done
+ * inside idr_replace, as part of
+ * rcu_assign_pointer
+ */
+ idr_replace(&ids->ipcs_idr, new, idx);
+ }
} else {
new->seq = ipcid_to_seqx(next_id);
idx = idr_alloc(&ids->ipcs_idr, new, ipcid_to_idx(next_id),
0, GFP_NOWAIT);
}
if (idx >= 0)
- new->id = SEQ_MULTIPLIER * new->seq + idx;
+ new->id = (new->seq << ipcmni_seq_shift()) + idx;
return idx;
}
@@ -254,8 +283,8 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int limit)
/* 1) Initialize the refcount so that ipc_rcu_putref works */
refcount_set(&new->refcount, 1);
- if (limit > IPCMNI)
- limit = IPCMNI;
+ if (limit > ipc_mni)
+ limit = ipc_mni;
if (ids->in_use >= limit)
return -ENOSPC;
@@ -738,7 +767,7 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
if (total >= ids->in_use)
return NULL;
- for (; pos < IPCMNI; pos++) {
+ for (; pos < ipc_mni; pos++) {
ipc = idr_find(&ids->ipcs_idr, pos);
if (ipc != NULL) {
*new_pos = pos + 1;
diff --git a/ipc/util.h b/ipc/util.h
index e272be622ae7..0fcf8e719b76 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -15,8 +15,37 @@
#include <linux/err.h>
#include <linux/ipc_namespace.h>
-#define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */
-#define SEQ_MULTIPLIER (IPCMNI)
+/*
+ * The IPC ID contains 2 separate numbers - index and sequence number.
+ * By default,
+ * bits 0-14: index (32k, 15 bits)
+ * bits 15-30: sequence number (64k, 16 bits)
+ *
+ * When IPCMNI extension mode is turned on, the composition changes:
+ * bits 0-23: index (16M, 24 bits)
+ * bits 24-30: sequence number (128, 7 bits)
+ */
+#define IPCMNI_SHIFT 15
+#define IPCMNI_EXTEND_SHIFT 24
+#define IPCMNI_EXTEND_MIN_CYCLE (RADIX_TREE_MAP_SIZE * RADIX_TREE_MAP_SIZE)
+#define IPCMNI (1 << IPCMNI_SHIFT)
+#define IPCMNI_EXTEND (1 << IPCMNI_EXTEND_SHIFT)
+
+#ifdef CONFIG_SYSVIPC_SYSCTL
+extern int ipc_mni;
+extern int ipc_mni_shift;
+extern int ipc_min_cycle;
+
+#define ipcmni_seq_shift() ipc_mni_shift
+#define IPCMNI_IDX_MASK ((1 << ipc_mni_shift) - 1)
+
+#else /* CONFIG_SYSVIPC_SYSCTL */
+
+#define ipc_mni IPCMNI
+#define ipc_min_cycle ((int)RADIX_TREE_MAP_SIZE)
+#define ipcmni_seq_shift() IPCMNI_SHIFT
+#define IPCMNI_IDX_MASK ((1 << IPCMNI_SHIFT) - 1)
+#endif /* CONFIG_SYSVIPC_SYSCTL */
void sem_init(void);
void msg_init(void);
@@ -96,9 +125,9 @@ struct pid_namespace *ipc_seq_pid_ns(struct seq_file *);
#define IPC_MSG_IDS 1
#define IPC_SHM_IDS 2
-#define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
-#define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER)
-#define IPCID_SEQ_MAX min_t(int, INT_MAX/SEQ_MULTIPLIER, USHRT_MAX)
+#define ipcid_to_idx(id) ((id) & IPCMNI_IDX_MASK)
+#define ipcid_to_seqx(id) ((id) >> ipcmni_seq_shift())
+#define ipcid_seq_max() (INT_MAX >> ipcmni_seq_shift())
/* must be called with ids->rwsem acquired for writing */
int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
@@ -123,8 +152,8 @@ static inline int ipc_get_maxidx(struct ipc_ids *ids)
if (ids->in_use == 0)
return -1;
- if (ids->in_use == IPCMNI)
- return IPCMNI - 1;
+ if (ids->in_use == ipc_mni)
+ return ipc_mni - 1;
return ids->max_idx;
}
@@ -216,10 +245,10 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
static inline int sem_check_semmni(struct ipc_namespace *ns) {
/*
- * Check semmni range [0, IPCMNI]
+ * Check semmni range [0, ipc_mni]
* semmni is the last element of sem_ctls[4] array
*/
- return ((ns->sem_ctls[3] < 0) || (ns->sem_ctls[3] > IPCMNI))
+ return ((ns->sem_ctls[3] < 0) || (ns->sem_ctls[3] > ipc_mni))
? -ERANGE : 0;
}