diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-31 21:10:18 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-31 21:10:18 -0400 |
| commit | 08615d7d85e5aa02c05bf6c4dde87d940e7f85f6 (patch) | |
| tree | 18906149d313d25914160aca21cedf54b3a7e818 /ipc | |
| parent | 9fdadb2cbaf4b482dfd6086e8bd3d2db071a1702 (diff) | |
| parent | 0a4dd35c67b144d8ef9432120105f1aab9293ee9 (diff) | |
Merge branch 'akpm' (Andrew's patch-bomb)
Merge misc patches from Andrew Morton:
- the "misc" tree - stuff from all over the map
- checkpatch updates
- fatfs
- kmod changes
- procfs
- cpumask
- UML
- kexec
- mqueue
- rapidio
- pidns
- some checkpoint-restore feature work. Reluctantly. Most of it
delayed a release. I'm still rather worried that we don't have a
clear roadmap to completion for this work.
* emailed from Andrew Morton <akpm@linux-foundation.org>: (78 patches)
kconfig: update compression algorithm info
c/r: prctl: add ability to set new mm_struct::exe_file
c/r: prctl: extend PR_SET_MM to set up more mm_struct entries
c/r: procfs: add arg_start/end, env_start/end and exit_code members to /proc/$pid/stat
syscalls, x86: add __NR_kcmp syscall
fs, proc: introduce /proc/<pid>/task/<tid>/children entry
sysctl: make kernel.ns_last_pid control dependent on CHECKPOINT_RESTORE
aio/vfs: cleanup of rw_copy_check_uvector() and compat_rw_copy_check_uvector()
eventfd: change int to __u64 in eventfd_signal()
fs/nls: add Apple NLS
pidns: make killed children autoreap
pidns: use task_active_pid_ns in do_notify_parent
rapidio/tsi721: add DMA engine support
rapidio: add DMA engine support for RIO data transfers
ipc/mqueue: add rbtree node caching support
tools/selftests: add mq_perf_tests
ipc/mqueue: strengthen checks on mqueue creation
ipc/mqueue: correct mq_attr_ok test
ipc/mqueue: improve performance of send/recv
selftests: add mq_open_tests
...
Diffstat (limited to 'ipc')
| -rw-r--r-- | ipc/mq_sysctl.c | 49 | ||||
| -rw-r--r-- | ipc/mqueue.c | 292 |
2 files changed, 258 insertions, 83 deletions
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index 0c09366b96f3..383d638340b8 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c | |||
| @@ -13,15 +13,6 @@ | |||
| 13 | #include <linux/ipc_namespace.h> | 13 | #include <linux/ipc_namespace.h> |
| 14 | #include <linux/sysctl.h> | 14 | #include <linux/sysctl.h> |
| 15 | 15 | ||
| 16 | /* | ||
| 17 | * Define the ranges various user-specified maximum values can | ||
| 18 | * be set to. | ||
| 19 | */ | ||
| 20 | #define MIN_MSGMAX 1 /* min value for msg_max */ | ||
| 21 | #define MAX_MSGMAX HARD_MSGMAX /* max value for msg_max */ | ||
| 22 | #define MIN_MSGSIZEMAX 128 /* min value for msgsize_max */ | ||
| 23 | #define MAX_MSGSIZEMAX (8192*128) /* max value for msgsize_max */ | ||
| 24 | |||
| 25 | #ifdef CONFIG_PROC_SYSCTL | 16 | #ifdef CONFIG_PROC_SYSCTL |
| 26 | static void *get_mq(ctl_table *table) | 17 | static void *get_mq(ctl_table *table) |
| 27 | { | 18 | { |
| @@ -31,16 +22,6 @@ static void *get_mq(ctl_table *table) | |||
| 31 | return which; | 22 | return which; |
| 32 | } | 23 | } |
| 33 | 24 | ||
| 34 | static int proc_mq_dointvec(ctl_table *table, int write, | ||
| 35 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 36 | { | ||
| 37 | struct ctl_table mq_table; | ||
| 38 | memcpy(&mq_table, table, sizeof(mq_table)); | ||
| 39 | mq_table.data = get_mq(table); | ||
| 40 | |||
| 41 | return proc_dointvec(&mq_table, write, buffer, lenp, ppos); | ||
| 42 | } | ||
| 43 | |||
| 44 | static int proc_mq_dointvec_minmax(ctl_table *table, int write, | 25 | static int proc_mq_dointvec_minmax(ctl_table *table, int write, |
| 45 | void __user *buffer, size_t *lenp, loff_t *ppos) | 26 | void __user *buffer, size_t *lenp, loff_t *ppos) |
| 46 | { | 27 | { |
| @@ -52,15 +33,17 @@ static int proc_mq_dointvec_minmax(ctl_table *table, int write, | |||
| 52 | lenp, ppos); | 33 | lenp, ppos); |
| 53 | } | 34 | } |
| 54 | #else | 35 | #else |
| 55 | #define proc_mq_dointvec NULL | ||
| 56 | #define proc_mq_dointvec_minmax NULL | 36 | #define proc_mq_dointvec_minmax NULL |
| 57 | #endif | 37 | #endif |
| 58 | 38 | ||
| 39 | static int msg_queues_limit_min = MIN_QUEUESMAX; | ||
| 40 | static int msg_queues_limit_max = HARD_QUEUESMAX; | ||
| 41 | |||
| 59 | static int msg_max_limit_min = MIN_MSGMAX; | 42 | static int msg_max_limit_min = MIN_MSGMAX; |
| 60 | static int msg_max_limit_max = MAX_MSGMAX; | 43 | static int msg_max_limit_max = HARD_MSGMAX; |
| 61 | 44 | ||
| 62 | static int msg_maxsize_limit_min = MIN_MSGSIZEMAX; | 45 | static int msg_maxsize_limit_min = MIN_MSGSIZEMAX; |
| 63 | static int msg_maxsize_limit_max = MAX_MSGSIZEMAX; | 46 | static int msg_maxsize_limit_max = HARD_MSGSIZEMAX; |
| 64 | 47 | ||
| 65 | static ctl_table mq_sysctls[] = { | 48 | static ctl_table mq_sysctls[] = { |
| 66 | { | 49 | { |
| @@ -68,7 +51,9 @@ static ctl_table mq_sysctls[] = { | |||
| 68 | .data = &init_ipc_ns.mq_queues_max, | 51 | .data = &init_ipc_ns.mq_queues_max, |
| 69 | .maxlen = sizeof(int), | 52 | .maxlen = sizeof(int), |
| 70 | .mode = 0644, | 53 | .mode = 0644, |
| 71 | .proc_handler = proc_mq_dointvec, | 54 | .proc_handler = proc_mq_dointvec_minmax, |
| 55 | .extra1 = &msg_queues_limit_min, | ||
| 56 | .extra2 = &msg_queues_limit_max, | ||
| 72 | }, | 57 | }, |
| 73 | { | 58 | { |
| 74 | .procname = "msg_max", | 59 | .procname = "msg_max", |
| @@ -88,6 +73,24 @@ static ctl_table mq_sysctls[] = { | |||
| 88 | .extra1 = &msg_maxsize_limit_min, | 73 | .extra1 = &msg_maxsize_limit_min, |
| 89 | .extra2 = &msg_maxsize_limit_max, | 74 | .extra2 = &msg_maxsize_limit_max, |
| 90 | }, | 75 | }, |
| 76 | { | ||
| 77 | .procname = "msg_default", | ||
| 78 | .data = &init_ipc_ns.mq_msg_default, | ||
| 79 | .maxlen = sizeof(int), | ||
| 80 | .mode = 0644, | ||
| 81 | .proc_handler = proc_mq_dointvec_minmax, | ||
| 82 | .extra1 = &msg_max_limit_min, | ||
| 83 | .extra2 = &msg_max_limit_max, | ||
| 84 | }, | ||
| 85 | { | ||
| 86 | .procname = "msgsize_default", | ||
| 87 | .data = &init_ipc_ns.mq_msgsize_default, | ||
| 88 | .maxlen = sizeof(int), | ||
| 89 | .mode = 0644, | ||
| 90 | .proc_handler = proc_mq_dointvec_minmax, | ||
| 91 | .extra1 = &msg_maxsize_limit_min, | ||
| 92 | .extra2 = &msg_maxsize_limit_max, | ||
| 93 | }, | ||
| 91 | {} | 94 | {} |
| 92 | }; | 95 | }; |
| 93 | 96 | ||
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index a2757d4ab773..8ce57691e7b6 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/mqueue.h> | 24 | #include <linux/mqueue.h> |
| 25 | #include <linux/msg.h> | 25 | #include <linux/msg.h> |
| 26 | #include <linux/skbuff.h> | 26 | #include <linux/skbuff.h> |
| 27 | #include <linux/vmalloc.h> | ||
| 27 | #include <linux/netlink.h> | 28 | #include <linux/netlink.h> |
| 28 | #include <linux/syscalls.h> | 29 | #include <linux/syscalls.h> |
| 29 | #include <linux/audit.h> | 30 | #include <linux/audit.h> |
| @@ -49,6 +50,12 @@ | |||
| 49 | #define STATE_PENDING 1 | 50 | #define STATE_PENDING 1 |
| 50 | #define STATE_READY 2 | 51 | #define STATE_READY 2 |
| 51 | 52 | ||
| 53 | struct posix_msg_tree_node { | ||
| 54 | struct rb_node rb_node; | ||
| 55 | struct list_head msg_list; | ||
| 56 | int priority; | ||
| 57 | }; | ||
| 58 | |||
| 52 | struct ext_wait_queue { /* queue of sleeping tasks */ | 59 | struct ext_wait_queue { /* queue of sleeping tasks */ |
| 53 | struct task_struct *task; | 60 | struct task_struct *task; |
| 54 | struct list_head list; | 61 | struct list_head list; |
| @@ -61,7 +68,8 @@ struct mqueue_inode_info { | |||
| 61 | struct inode vfs_inode; | 68 | struct inode vfs_inode; |
| 62 | wait_queue_head_t wait_q; | 69 | wait_queue_head_t wait_q; |
| 63 | 70 | ||
| 64 | struct msg_msg **messages; | 71 | struct rb_root msg_tree; |
| 72 | struct posix_msg_tree_node *node_cache; | ||
| 65 | struct mq_attr attr; | 73 | struct mq_attr attr; |
| 66 | 74 | ||
| 67 | struct sigevent notify; | 75 | struct sigevent notify; |
| @@ -109,6 +117,103 @@ static struct ipc_namespace *get_ns_from_inode(struct inode *inode) | |||
| 109 | return ns; | 117 | return ns; |
| 110 | } | 118 | } |
| 111 | 119 | ||
| 120 | /* Auxiliary functions to manipulate messages' list */ | ||
| 121 | static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info) | ||
| 122 | { | ||
| 123 | struct rb_node **p, *parent = NULL; | ||
| 124 | struct posix_msg_tree_node *leaf; | ||
| 125 | |||
| 126 | p = &info->msg_tree.rb_node; | ||
| 127 | while (*p) { | ||
| 128 | parent = *p; | ||
| 129 | leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node); | ||
| 130 | |||
| 131 | if (likely(leaf->priority == msg->m_type)) | ||
| 132 | goto insert_msg; | ||
| 133 | else if (msg->m_type < leaf->priority) | ||
| 134 | p = &(*p)->rb_left; | ||
| 135 | else | ||
| 136 | p = &(*p)->rb_right; | ||
| 137 | } | ||
| 138 | if (info->node_cache) { | ||
| 139 | leaf = info->node_cache; | ||
| 140 | info->node_cache = NULL; | ||
| 141 | } else { | ||
| 142 | leaf = kmalloc(sizeof(*leaf), GFP_ATOMIC); | ||
| 143 | if (!leaf) | ||
| 144 | return -ENOMEM; | ||
| 145 | rb_init_node(&leaf->rb_node); | ||
| 146 | INIT_LIST_HEAD(&leaf->msg_list); | ||
| 147 | info->qsize += sizeof(*leaf); | ||
| 148 | } | ||
| 149 | leaf->priority = msg->m_type; | ||
| 150 | rb_link_node(&leaf->rb_node, parent, p); | ||
| 151 | rb_insert_color(&leaf->rb_node, &info->msg_tree); | ||
| 152 | insert_msg: | ||
| 153 | info->attr.mq_curmsgs++; | ||
| 154 | info->qsize += msg->m_ts; | ||
| 155 | list_add_tail(&msg->m_list, &leaf->msg_list); | ||
| 156 | return 0; | ||
| 157 | } | ||
| 158 | |||
| 159 | static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) | ||
| 160 | { | ||
| 161 | struct rb_node **p, *parent = NULL; | ||
| 162 | struct posix_msg_tree_node *leaf; | ||
| 163 | struct msg_msg *msg; | ||
| 164 | |||
| 165 | try_again: | ||
| 166 | p = &info->msg_tree.rb_node; | ||
| 167 | while (*p) { | ||
| 168 | parent = *p; | ||
| 169 | /* | ||
| 170 | * During insert, low priorities go to the left and high to the | ||
| 171 | * right. On receive, we want the highest priorities first, so | ||
| 172 | * walk all the way to the right. | ||
| 173 | */ | ||
| 174 | p = &(*p)->rb_right; | ||
| 175 | } | ||
| 176 | if (!parent) { | ||
| 177 | if (info->attr.mq_curmsgs) { | ||
| 178 | pr_warn_once("Inconsistency in POSIX message queue, " | ||
| 179 | "no tree element, but supposedly messages " | ||
| 180 | "should exist!\n"); | ||
| 181 | info->attr.mq_curmsgs = 0; | ||
| 182 | } | ||
| 183 | return NULL; | ||
| 184 | } | ||
| 185 | leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node); | ||
| 186 | if (unlikely(list_empty(&leaf->msg_list))) { | ||
| 187 | pr_warn_once("Inconsistency in POSIX message queue, " | ||
| 188 | "empty leaf node but we haven't implemented " | ||
| 189 | "lazy leaf delete!\n"); | ||
| 190 | rb_erase(&leaf->rb_node, &info->msg_tree); | ||
| 191 | if (info->node_cache) { | ||
| 192 | info->qsize -= sizeof(*leaf); | ||
| 193 | kfree(leaf); | ||
| 194 | } else { | ||
| 195 | info->node_cache = leaf; | ||
| 196 | } | ||
| 197 | goto try_again; | ||
| 198 | } else { | ||
| 199 | msg = list_first_entry(&leaf->msg_list, | ||
| 200 | struct msg_msg, m_list); | ||
| 201 | list_del(&msg->m_list); | ||
| 202 | if (list_empty(&leaf->msg_list)) { | ||
| 203 | rb_erase(&leaf->rb_node, &info->msg_tree); | ||
| 204 | if (info->node_cache) { | ||
| 205 | info->qsize -= sizeof(*leaf); | ||
| 206 | kfree(leaf); | ||
| 207 | } else { | ||
| 208 | info->node_cache = leaf; | ||
| 209 | } | ||
| 210 | } | ||
| 211 | } | ||
| 212 | info->attr.mq_curmsgs--; | ||
| 213 | info->qsize -= msg->m_ts; | ||
| 214 | return msg; | ||
| 215 | } | ||
| 216 | |||
| 112 | static struct inode *mqueue_get_inode(struct super_block *sb, | 217 | static struct inode *mqueue_get_inode(struct super_block *sb, |
| 113 | struct ipc_namespace *ipc_ns, umode_t mode, | 218 | struct ipc_namespace *ipc_ns, umode_t mode, |
| 114 | struct mq_attr *attr) | 219 | struct mq_attr *attr) |
| @@ -129,7 +234,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, | |||
| 129 | 234 | ||
| 130 | if (S_ISREG(mode)) { | 235 | if (S_ISREG(mode)) { |
| 131 | struct mqueue_inode_info *info; | 236 | struct mqueue_inode_info *info; |
| 132 | unsigned long mq_bytes, mq_msg_tblsz; | 237 | unsigned long mq_bytes, mq_treesize; |
| 133 | 238 | ||
| 134 | inode->i_fop = &mqueue_file_operations; | 239 | inode->i_fop = &mqueue_file_operations; |
| 135 | inode->i_size = FILENT_SIZE; | 240 | inode->i_size = FILENT_SIZE; |
| @@ -143,20 +248,36 @@ static struct inode *mqueue_get_inode(struct super_block *sb, | |||
| 143 | info->notify_user_ns = NULL; | 248 | info->notify_user_ns = NULL; |
| 144 | info->qsize = 0; | 249 | info->qsize = 0; |
| 145 | info->user = NULL; /* set when all is ok */ | 250 | info->user = NULL; /* set when all is ok */ |
| 251 | info->msg_tree = RB_ROOT; | ||
| 252 | info->node_cache = NULL; | ||
| 146 | memset(&info->attr, 0, sizeof(info->attr)); | 253 | memset(&info->attr, 0, sizeof(info->attr)); |
| 147 | info->attr.mq_maxmsg = ipc_ns->mq_msg_max; | 254 | info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, |
| 148 | info->attr.mq_msgsize = ipc_ns->mq_msgsize_max; | 255 | ipc_ns->mq_msg_default); |
| 256 | info->attr.mq_msgsize = min(ipc_ns->mq_msgsize_max, | ||
| 257 | ipc_ns->mq_msgsize_default); | ||
| 149 | if (attr) { | 258 | if (attr) { |
| 150 | info->attr.mq_maxmsg = attr->mq_maxmsg; | 259 | info->attr.mq_maxmsg = attr->mq_maxmsg; |
| 151 | info->attr.mq_msgsize = attr->mq_msgsize; | 260 | info->attr.mq_msgsize = attr->mq_msgsize; |
| 152 | } | 261 | } |
| 153 | mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *); | 262 | /* |
| 154 | info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); | 263 | * We used to allocate a static array of pointers and account |
| 155 | if (!info->messages) | 264 | * the size of that array as well as one msg_msg struct per |
| 156 | goto out_inode; | 265 | * possible message into the queue size. That's no longer |
| 266 | * accurate as the queue is now an rbtree and will grow and | ||
| 267 | * shrink depending on usage patterns. We can, however, still | ||
| 268 | * account one msg_msg struct per message, but the nodes are | ||
| 269 | * allocated depending on priority usage, and most programs | ||
| 270 | * only use one, or a handful, of priorities. However, since | ||
| 271 | * this is pinned memory, we need to assume worst case, so | ||
| 272 | * that means the min(mq_maxmsg, max_priorities) * struct | ||
| 273 | * posix_msg_tree_node. | ||
| 274 | */ | ||
| 275 | mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + | ||
| 276 | min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * | ||
| 277 | sizeof(struct posix_msg_tree_node); | ||
| 157 | 278 | ||
| 158 | mq_bytes = (mq_msg_tblsz + | 279 | mq_bytes = mq_treesize + (info->attr.mq_maxmsg * |
| 159 | (info->attr.mq_maxmsg * info->attr.mq_msgsize)); | 280 | info->attr.mq_msgsize); |
| 160 | 281 | ||
| 161 | spin_lock(&mq_lock); | 282 | spin_lock(&mq_lock); |
| 162 | if (u->mq_bytes + mq_bytes < u->mq_bytes || | 283 | if (u->mq_bytes + mq_bytes < u->mq_bytes || |
| @@ -247,9 +368,9 @@ static void mqueue_evict_inode(struct inode *inode) | |||
| 247 | { | 368 | { |
| 248 | struct mqueue_inode_info *info; | 369 | struct mqueue_inode_info *info; |
| 249 | struct user_struct *user; | 370 | struct user_struct *user; |
| 250 | unsigned long mq_bytes; | 371 | unsigned long mq_bytes, mq_treesize; |
| 251 | int i; | ||
| 252 | struct ipc_namespace *ipc_ns; | 372 | struct ipc_namespace *ipc_ns; |
| 373 | struct msg_msg *msg; | ||
| 253 | 374 | ||
| 254 | clear_inode(inode); | 375 | clear_inode(inode); |
| 255 | 376 | ||
| @@ -259,14 +380,19 @@ static void mqueue_evict_inode(struct inode *inode) | |||
| 259 | ipc_ns = get_ns_from_inode(inode); | 380 | ipc_ns = get_ns_from_inode(inode); |
| 260 | info = MQUEUE_I(inode); | 381 | info = MQUEUE_I(inode); |
| 261 | spin_lock(&info->lock); | 382 | spin_lock(&info->lock); |
| 262 | for (i = 0; i < info->attr.mq_curmsgs; i++) | 383 | while ((msg = msg_get(info)) != NULL) |
| 263 | free_msg(info->messages[i]); | 384 | free_msg(msg); |
| 264 | kfree(info->messages); | 385 | kfree(info->node_cache); |
| 265 | spin_unlock(&info->lock); | 386 | spin_unlock(&info->lock); |
| 266 | 387 | ||
| 267 | /* Total amount of bytes accounted for the mqueue */ | 388 | /* Total amount of bytes accounted for the mqueue */ |
| 268 | mq_bytes = info->attr.mq_maxmsg * (sizeof(struct msg_msg *) | 389 | mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + |
| 269 | + info->attr.mq_msgsize); | 390 | min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * |
| 391 | sizeof(struct posix_msg_tree_node); | ||
| 392 | |||
| 393 | mq_bytes = mq_treesize + (info->attr.mq_maxmsg * | ||
| 394 | info->attr.mq_msgsize); | ||
| 395 | |||
| 270 | user = info->user; | 396 | user = info->user; |
| 271 | if (user) { | 397 | if (user) { |
| 272 | spin_lock(&mq_lock); | 398 | spin_lock(&mq_lock); |
| @@ -300,8 +426,9 @@ static int mqueue_create(struct inode *dir, struct dentry *dentry, | |||
| 300 | error = -EACCES; | 426 | error = -EACCES; |
| 301 | goto out_unlock; | 427 | goto out_unlock; |
| 302 | } | 428 | } |
| 303 | if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max && | 429 | if (ipc_ns->mq_queues_count >= HARD_QUEUESMAX || |
| 304 | !capable(CAP_SYS_RESOURCE)) { | 430 | (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max && |
| 431 | !capable(CAP_SYS_RESOURCE))) { | ||
| 305 | error = -ENOSPC; | 432 | error = -ENOSPC; |
| 306 | goto out_unlock; | 433 | goto out_unlock; |
| 307 | } | 434 | } |
| @@ -485,26 +612,6 @@ static struct ext_wait_queue *wq_get_first_waiter( | |||
| 485 | return list_entry(ptr, struct ext_wait_queue, list); | 612 | return list_entry(ptr, struct ext_wait_queue, list); |
| 486 | } | 613 | } |
| 487 | 614 | ||
| 488 | /* Auxiliary functions to manipulate messages' list */ | ||
| 489 | static void msg_insert(struct msg_msg *ptr, struct mqueue_inode_info *info) | ||
| 490 | { | ||
| 491 | int k; | ||
| 492 | |||
| 493 | k = info->attr.mq_curmsgs - 1; | ||
| 494 | while (k >= 0 && info->messages[k]->m_type >= ptr->m_type) { | ||
| 495 | info->messages[k + 1] = info->messages[k]; | ||
| 496 | k--; | ||
| 497 | } | ||
| 498 | info->attr.mq_curmsgs++; | ||
| 499 | info->qsize += ptr->m_ts; | ||
| 500 | info->messages[k + 1] = ptr; | ||
| 501 | } | ||
| 502 | |||
| 503 | static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) | ||
| 504 | { | ||
| 505 | info->qsize -= info->messages[--info->attr.mq_curmsgs]->m_ts; | ||
| 506 | return info->messages[info->attr.mq_curmsgs]; | ||
| 507 | } | ||
| 508 | 615 | ||
| 509 | static inline void set_cookie(struct sk_buff *skb, char code) | 616 | static inline void set_cookie(struct sk_buff *skb, char code) |
| 510 | { | 617 | { |
| @@ -585,24 +692,30 @@ static void remove_notification(struct mqueue_inode_info *info) | |||
| 585 | 692 | ||
| 586 | static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) | 693 | static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) |
| 587 | { | 694 | { |
| 695 | int mq_treesize; | ||
| 696 | unsigned long total_size; | ||
| 697 | |||
| 588 | if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0) | 698 | if (attr->mq_maxmsg <= 0 || attr->mq_msgsize <= 0) |
| 589 | return 0; | 699 | return -EINVAL; |
| 590 | if (capable(CAP_SYS_RESOURCE)) { | 700 | if (capable(CAP_SYS_RESOURCE)) { |
| 591 | if (attr->mq_maxmsg > HARD_MSGMAX) | 701 | if (attr->mq_maxmsg > HARD_MSGMAX || |
| 592 | return 0; | 702 | attr->mq_msgsize > HARD_MSGSIZEMAX) |
| 703 | return -EINVAL; | ||
| 593 | } else { | 704 | } else { |
| 594 | if (attr->mq_maxmsg > ipc_ns->mq_msg_max || | 705 | if (attr->mq_maxmsg > ipc_ns->mq_msg_max || |
| 595 | attr->mq_msgsize > ipc_ns->mq_msgsize_max) | 706 | attr->mq_msgsize > ipc_ns->mq_msgsize_max) |
| 596 | return 0; | 707 | return -EINVAL; |
| 597 | } | 708 | } |
| 598 | /* check for overflow */ | 709 | /* check for overflow */ |
| 599 | if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg) | 710 | if (attr->mq_msgsize > ULONG_MAX/attr->mq_maxmsg) |
| 600 | return 0; | 711 | return -EOVERFLOW; |
| 601 | if ((unsigned long)(attr->mq_maxmsg * (attr->mq_msgsize | 712 | mq_treesize = attr->mq_maxmsg * sizeof(struct msg_msg) + |
| 602 | + sizeof (struct msg_msg *))) < | 713 | min_t(unsigned int, attr->mq_maxmsg, MQ_PRIO_MAX) * |
| 603 | (unsigned long)(attr->mq_maxmsg * attr->mq_msgsize)) | 714 | sizeof(struct posix_msg_tree_node); |
| 604 | return 0; | 715 | total_size = attr->mq_maxmsg * attr->mq_msgsize; |
| 605 | return 1; | 716 | if (total_size + mq_treesize < total_size) |
| 717 | return -EOVERFLOW; | ||
| 718 | return 0; | ||
| 606 | } | 719 | } |
| 607 | 720 | ||
| 608 | /* | 721 | /* |
| @@ -617,12 +730,21 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir, | |||
| 617 | int ret; | 730 | int ret; |
| 618 | 731 | ||
| 619 | if (attr) { | 732 | if (attr) { |
| 620 | if (!mq_attr_ok(ipc_ns, attr)) { | 733 | ret = mq_attr_ok(ipc_ns, attr); |
| 621 | ret = -EINVAL; | 734 | if (ret) |
| 622 | goto out; | 735 | goto out; |
| 623 | } | ||
| 624 | /* store for use during create */ | 736 | /* store for use during create */ |
| 625 | dentry->d_fsdata = attr; | 737 | dentry->d_fsdata = attr; |
| 738 | } else { | ||
| 739 | struct mq_attr def_attr; | ||
| 740 | |||
| 741 | def_attr.mq_maxmsg = min(ipc_ns->mq_msg_max, | ||
| 742 | ipc_ns->mq_msg_default); | ||
| 743 | def_attr.mq_msgsize = min(ipc_ns->mq_msgsize_max, | ||
| 744 | ipc_ns->mq_msgsize_default); | ||
| 745 | ret = mq_attr_ok(ipc_ns, &def_attr); | ||
| 746 | if (ret) | ||
| 747 | goto out; | ||
| 626 | } | 748 | } |
| 627 | 749 | ||
| 628 | mode &= ~current_umask(); | 750 | mode &= ~current_umask(); |
| @@ -837,7 +959,8 @@ static inline void pipelined_receive(struct mqueue_inode_info *info) | |||
| 837 | wake_up_interruptible(&info->wait_q); | 959 | wake_up_interruptible(&info->wait_q); |
| 838 | return; | 960 | return; |
| 839 | } | 961 | } |
| 840 | msg_insert(sender->msg, info); | 962 | if (msg_insert(sender->msg, info)) |
| 963 | return; | ||
| 841 | list_del(&sender->list); | 964 | list_del(&sender->list); |
| 842 | sender->state = STATE_PENDING; | 965 | sender->state = STATE_PENDING; |
| 843 | wake_up_process(sender->task); | 966 | wake_up_process(sender->task); |
| @@ -857,7 +980,8 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, | |||
| 857 | struct mqueue_inode_info *info; | 980 | struct mqueue_inode_info *info; |
| 858 | ktime_t expires, *timeout = NULL; | 981 | ktime_t expires, *timeout = NULL; |
| 859 | struct timespec ts; | 982 | struct timespec ts; |
| 860 | int ret; | 983 | struct posix_msg_tree_node *new_leaf = NULL; |
| 984 | int ret = 0; | ||
| 861 | 985 | ||
| 862 | if (u_abs_timeout) { | 986 | if (u_abs_timeout) { |
| 863 | int res = prepare_timeout(u_abs_timeout, &expires, &ts); | 987 | int res = prepare_timeout(u_abs_timeout, &expires, &ts); |
| @@ -905,34 +1029,60 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, | |||
| 905 | msg_ptr->m_ts = msg_len; | 1029 | msg_ptr->m_ts = msg_len; |
| 906 | msg_ptr->m_type = msg_prio; | 1030 | msg_ptr->m_type = msg_prio; |
| 907 | 1031 | ||
| 1032 | /* | ||
| 1033 | * msg_insert really wants us to have a valid, spare node struct so | ||
| 1034 | * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will | ||
| 1035 | * fall back to that if necessary. | ||
| 1036 | */ | ||
| 1037 | if (!info->node_cache) | ||
| 1038 | new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL); | ||
| 1039 | |||
| 908 | spin_lock(&info->lock); | 1040 | spin_lock(&info->lock); |
| 909 | 1041 | ||
| 1042 | if (!info->node_cache && new_leaf) { | ||
| 1043 | /* Save our speculative allocation into the cache */ | ||
| 1044 | rb_init_node(&new_leaf->rb_node); | ||
| 1045 | INIT_LIST_HEAD(&new_leaf->msg_list); | ||
| 1046 | info->node_cache = new_leaf; | ||
| 1047 | info->qsize += sizeof(*new_leaf); | ||
| 1048 | new_leaf = NULL; | ||
| 1049 | } else { | ||
| 1050 | kfree(new_leaf); | ||
| 1051 | } | ||
| 1052 | |||
| 910 | if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) { | 1053 | if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) { |
| 911 | if (filp->f_flags & O_NONBLOCK) { | 1054 | if (filp->f_flags & O_NONBLOCK) { |
| 912 | spin_unlock(&info->lock); | ||
| 913 | ret = -EAGAIN; | 1055 | ret = -EAGAIN; |
| 914 | } else { | 1056 | } else { |
| 915 | wait.task = current; | 1057 | wait.task = current; |
| 916 | wait.msg = (void *) msg_ptr; | 1058 | wait.msg = (void *) msg_ptr; |
| 917 | wait.state = STATE_NONE; | 1059 | wait.state = STATE_NONE; |
| 918 | ret = wq_sleep(info, SEND, timeout, &wait); | 1060 | ret = wq_sleep(info, SEND, timeout, &wait); |
| 1061 | /* | ||
| 1062 | * wq_sleep must be called with info->lock held, and | ||
| 1063 | * returns with the lock released | ||
| 1064 | */ | ||
| 1065 | goto out_free; | ||
| 919 | } | 1066 | } |
| 920 | if (ret < 0) | ||
| 921 | free_msg(msg_ptr); | ||
| 922 | } else { | 1067 | } else { |
| 923 | receiver = wq_get_first_waiter(info, RECV); | 1068 | receiver = wq_get_first_waiter(info, RECV); |
| 924 | if (receiver) { | 1069 | if (receiver) { |
| 925 | pipelined_send(info, msg_ptr, receiver); | 1070 | pipelined_send(info, msg_ptr, receiver); |
| 926 | } else { | 1071 | } else { |
| 927 | /* adds message to the queue */ | 1072 | /* adds message to the queue */ |
| 928 | msg_insert(msg_ptr, info); | 1073 | ret = msg_insert(msg_ptr, info); |
| 1074 | if (ret) | ||
| 1075 | goto out_unlock; | ||
| 929 | __do_notify(info); | 1076 | __do_notify(info); |
| 930 | } | 1077 | } |
| 931 | inode->i_atime = inode->i_mtime = inode->i_ctime = | 1078 | inode->i_atime = inode->i_mtime = inode->i_ctime = |
| 932 | CURRENT_TIME; | 1079 | CURRENT_TIME; |
| 933 | spin_unlock(&info->lock); | ||
| 934 | ret = 0; | ||
| 935 | } | 1080 | } |
| 1081 | out_unlock: | ||
| 1082 | spin_unlock(&info->lock); | ||
| 1083 | out_free: | ||
| 1084 | if (ret) | ||
| 1085 | free_msg(msg_ptr); | ||
| 936 | out_fput: | 1086 | out_fput: |
| 937 | fput(filp); | 1087 | fput(filp); |
| 938 | out: | 1088 | out: |
| @@ -951,6 +1101,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, | |||
| 951 | struct ext_wait_queue wait; | 1101 | struct ext_wait_queue wait; |
| 952 | ktime_t expires, *timeout = NULL; | 1102 | ktime_t expires, *timeout = NULL; |
| 953 | struct timespec ts; | 1103 | struct timespec ts; |
| 1104 | struct posix_msg_tree_node *new_leaf = NULL; | ||
| 954 | 1105 | ||
| 955 | if (u_abs_timeout) { | 1106 | if (u_abs_timeout) { |
| 956 | int res = prepare_timeout(u_abs_timeout, &expires, &ts); | 1107 | int res = prepare_timeout(u_abs_timeout, &expires, &ts); |
| @@ -986,7 +1137,26 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, | |||
| 986 | goto out_fput; | 1137 | goto out_fput; |
| 987 | } | 1138 | } |
| 988 | 1139 | ||
| 1140 | /* | ||
| 1141 | * msg_insert really wants us to have a valid, spare node struct so | ||
| 1142 | * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will | ||
| 1143 | * fall back to that if necessary. | ||
| 1144 | */ | ||
| 1145 | if (!info->node_cache) | ||
| 1146 | new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL); | ||
| 1147 | |||
| 989 | spin_lock(&info->lock); | 1148 | spin_lock(&info->lock); |
| 1149 | |||
| 1150 | if (!info->node_cache && new_leaf) { | ||
| 1151 | /* Save our speculative allocation into the cache */ | ||
| 1152 | rb_init_node(&new_leaf->rb_node); | ||
| 1153 | INIT_LIST_HEAD(&new_leaf->msg_list); | ||
| 1154 | info->node_cache = new_leaf; | ||
| 1155 | info->qsize += sizeof(*new_leaf); | ||
| 1156 | } else { | ||
| 1157 | kfree(new_leaf); | ||
| 1158 | } | ||
| 1159 | |||
| 990 | if (info->attr.mq_curmsgs == 0) { | 1160 | if (info->attr.mq_curmsgs == 0) { |
| 991 | if (filp->f_flags & O_NONBLOCK) { | 1161 | if (filp->f_flags & O_NONBLOCK) { |
| 992 | spin_unlock(&info->lock); | 1162 | spin_unlock(&info->lock); |
| @@ -1251,6 +1421,8 @@ int mq_init_ns(struct ipc_namespace *ns) | |||
| 1251 | ns->mq_queues_max = DFLT_QUEUESMAX; | 1421 | ns->mq_queues_max = DFLT_QUEUESMAX; |
| 1252 | ns->mq_msg_max = DFLT_MSGMAX; | 1422 | ns->mq_msg_max = DFLT_MSGMAX; |
| 1253 | ns->mq_msgsize_max = DFLT_MSGSIZEMAX; | 1423 | ns->mq_msgsize_max = DFLT_MSGSIZEMAX; |
| 1424 | ns->mq_msg_default = DFLT_MSG; | ||
| 1425 | ns->mq_msgsize_default = DFLT_MSGSIZE; | ||
| 1254 | 1426 | ||
| 1255 | ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); | 1427 | ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns); |
| 1256 | if (IS_ERR(ns->mq_mnt)) { | 1428 | if (IS_ERR(ns->mq_mnt)) { |
