diff options
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/mqueue.c | 173 |
1 files changed, 130 insertions, 43 deletions
diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 609d53f7a915..3c72a05dc326 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c | |||
@@ -50,6 +50,12 @@ | |||
50 | #define STATE_PENDING 1 | 50 | #define STATE_PENDING 1 |
51 | #define STATE_READY 2 | 51 | #define STATE_READY 2 |
52 | 52 | ||
53 | struct posix_msg_tree_node { | ||
54 | struct rb_node rb_node; | ||
55 | struct list_head msg_list; | ||
56 | int priority; | ||
57 | }; | ||
58 | |||
53 | struct ext_wait_queue { /* queue of sleeping tasks */ | 59 | struct ext_wait_queue { /* queue of sleeping tasks */ |
54 | struct task_struct *task; | 60 | struct task_struct *task; |
55 | struct list_head list; | 61 | struct list_head list; |
@@ -62,7 +68,7 @@ struct mqueue_inode_info { | |||
62 | struct inode vfs_inode; | 68 | struct inode vfs_inode; |
63 | wait_queue_head_t wait_q; | 69 | wait_queue_head_t wait_q; |
64 | 70 | ||
65 | struct msg_msg **messages; | 71 | struct rb_root msg_tree; |
66 | struct mq_attr attr; | 72 | struct mq_attr attr; |
67 | 73 | ||
68 | struct sigevent notify; | 74 | struct sigevent notify; |
@@ -110,6 +116,90 @@ static struct ipc_namespace *get_ns_from_inode(struct inode *inode) | |||
110 | return ns; | 116 | return ns; |
111 | } | 117 | } |
112 | 118 | ||
119 | /* Auxiliary functions to manipulate messages' list */ | ||
120 | static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info) | ||
121 | { | ||
122 | struct rb_node **p, *parent = NULL; | ||
123 | struct posix_msg_tree_node *leaf; | ||
124 | |||
125 | p = &info->msg_tree.rb_node; | ||
126 | while (*p) { | ||
127 | parent = *p; | ||
128 | leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node); | ||
129 | |||
130 | if (likely(leaf->priority == msg->m_type)) | ||
131 | goto insert_msg; | ||
132 | else if (msg->m_type < leaf->priority) | ||
133 | p = &(*p)->rb_left; | ||
134 | else | ||
135 | p = &(*p)->rb_right; | ||
136 | } | ||
137 | leaf = kzalloc(sizeof(*leaf), GFP_ATOMIC); | ||
138 | if (!leaf) | ||
139 | return -ENOMEM; | ||
140 | rb_init_node(&leaf->rb_node); | ||
141 | INIT_LIST_HEAD(&leaf->msg_list); | ||
142 | leaf->priority = msg->m_type; | ||
143 | rb_link_node(&leaf->rb_node, parent, p); | ||
144 | rb_insert_color(&leaf->rb_node, &info->msg_tree); | ||
145 | info->qsize += sizeof(struct posix_msg_tree_node); | ||
146 | insert_msg: | ||
147 | info->attr.mq_curmsgs++; | ||
148 | info->qsize += msg->m_ts; | ||
149 | list_add_tail(&msg->m_list, &leaf->msg_list); | ||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) | ||
154 | { | ||
155 | struct rb_node **p, *parent = NULL; | ||
156 | struct posix_msg_tree_node *leaf; | ||
157 | struct msg_msg *msg; | ||
158 | |||
159 | try_again: | ||
160 | p = &info->msg_tree.rb_node; | ||
161 | while (*p) { | ||
162 | parent = *p; | ||
163 | /* | ||
164 | * During insert, low priorities go to the left and high to the | ||
165 | * right. On receive, we want the highest priorities first, so | ||
166 | * walk all the way to the right. | ||
167 | */ | ||
168 | p = &(*p)->rb_right; | ||
169 | } | ||
170 | if (!parent) { | ||
171 | if (info->attr.mq_curmsgs) { | ||
172 | pr_warn_once("Inconsistency in POSIX message queue, " | ||
173 | "no tree element, but supposedly messages " | ||
174 | "should exist!\n"); | ||
175 | info->attr.mq_curmsgs = 0; | ||
176 | } | ||
177 | return NULL; | ||
178 | } | ||
179 | leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node); | ||
180 | if (list_empty(&leaf->msg_list)) { | ||
181 | pr_warn_once("Inconsistency in POSIX message queue, " | ||
182 | "empty leaf node but we haven't implemented " | ||
183 | "lazy leaf delete!\n"); | ||
184 | rb_erase(&leaf->rb_node, &info->msg_tree); | ||
185 | info->qsize -= sizeof(struct posix_msg_tree_node); | ||
186 | kfree(leaf); | ||
187 | goto try_again; | ||
188 | } else { | ||
189 | msg = list_first_entry(&leaf->msg_list, | ||
190 | struct msg_msg, m_list); | ||
191 | list_del(&msg->m_list); | ||
192 | if (list_empty(&leaf->msg_list)) { | ||
193 | rb_erase(&leaf->rb_node, &info->msg_tree); | ||
194 | info->qsize -= sizeof(struct posix_msg_tree_node); | ||
195 | kfree(leaf); | ||
196 | } | ||
197 | } | ||
198 | info->attr.mq_curmsgs--; | ||
199 | info->qsize -= msg->m_ts; | ||
200 | return msg; | ||
201 | } | ||
202 | |||
113 | static struct inode *mqueue_get_inode(struct super_block *sb, | 203 | static struct inode *mqueue_get_inode(struct super_block *sb, |
114 | struct ipc_namespace *ipc_ns, umode_t mode, | 204 | struct ipc_namespace *ipc_ns, umode_t mode, |
115 | struct mq_attr *attr) | 205 | struct mq_attr *attr) |
@@ -130,7 +220,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, | |||
130 | 220 | ||
131 | if (S_ISREG(mode)) { | 221 | if (S_ISREG(mode)) { |
132 | struct mqueue_inode_info *info; | 222 | struct mqueue_inode_info *info; |
133 | unsigned long mq_bytes, mq_msg_tblsz; | 223 | unsigned long mq_bytes, mq_treesize; |
134 | 224 | ||
135 | inode->i_fop = &mqueue_file_operations; | 225 | inode->i_fop = &mqueue_file_operations; |
136 | inode->i_size = FILENT_SIZE; | 226 | inode->i_size = FILENT_SIZE; |
@@ -144,6 +234,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, | |||
144 | info->notify_user_ns = NULL; | 234 | info->notify_user_ns = NULL; |
145 | info->qsize = 0; | 235 | info->qsize = 0; |
146 | info->user = NULL; /* set when all is ok */ | 236 | info->user = NULL; /* set when all is ok */ |
237 | info->msg_tree = RB_ROOT; | ||
147 | memset(&info->attr, 0, sizeof(info->attr)); | 238 | memset(&info->attr, 0, sizeof(info->attr)); |
148 | info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, | 239 | info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, |
149 | ipc_ns->mq_msg_default); | 240 | ipc_ns->mq_msg_default); |
@@ -153,16 +244,25 @@ static struct inode *mqueue_get_inode(struct super_block *sb, | |||
153 | info->attr.mq_maxmsg = attr->mq_maxmsg; | 244 | info->attr.mq_maxmsg = attr->mq_maxmsg; |
154 | info->attr.mq_msgsize = attr->mq_msgsize; | 245 | info->attr.mq_msgsize = attr->mq_msgsize; |
155 | } | 246 | } |
156 | mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *); | 247 | /* |
157 | if (mq_msg_tblsz > PAGE_SIZE) | 248 | * We used to allocate a static array of pointers and account |
158 | info->messages = vmalloc(mq_msg_tblsz); | 249 | * the size of that array as well as one msg_msg struct per |
159 | else | 250 | * possible message into the queue size. That's no longer |
160 | info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL); | 251 | * accurate as the queue is now an rbtree and will grow and |
161 | if (!info->messages) | 252 | * shrink depending on usage patterns. We can, however, still |
162 | goto out_inode; | 253 | * account one msg_msg struct per message, but the nodes are |
254 | * allocated depending on priority usage, and most programs | ||
255 | * only use one, or a handful, of priorities. However, since | ||
256 | * this is pinned memory, we need to assume worst case, so | ||
257 | * that means the min(mq_maxmsg, max_priorities) * struct | ||
258 | * posix_msg_tree_node. | ||
259 | */ | ||
260 | mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + | ||
261 | min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * | ||
262 | sizeof(struct posix_msg_tree_node); | ||
163 | 263 | ||
164 | mq_bytes = (mq_msg_tblsz + | 264 | mq_bytes = mq_treesize + (info->attr.mq_maxmsg * |
165 | (info->attr.mq_maxmsg * info->attr.mq_msgsize)); | 265 | info->attr.mq_msgsize); |
166 | 266 | ||
167 | spin_lock(&mq_lock); | 267 | spin_lock(&mq_lock); |
168 | if (u->mq_bytes + mq_bytes < u->mq_bytes || | 268 | if (u->mq_bytes + mq_bytes < u->mq_bytes || |
@@ -253,9 +353,9 @@ static void mqueue_evict_inode(struct inode *inode) | |||
253 | { | 353 | { |
254 | struct mqueue_inode_info *info; | 354 | struct mqueue_inode_info *info; |
255 | struct user_struct *user; | 355 | struct user_struct *user; |
256 | unsigned long mq_bytes; | 356 | unsigned long mq_bytes, mq_treesize; |
257 | int i; | ||
258 | struct ipc_namespace *ipc_ns; | 357 | struct ipc_namespace *ipc_ns; |
358 | struct msg_msg *msg; | ||
259 | 359 | ||
260 | clear_inode(inode); | 360 | clear_inode(inode); |
261 | 361 | ||
@@ -265,17 +365,18 @@ static void mqueue_evict_inode(struct inode *inode) | |||
265 | ipc_ns = get_ns_from_inode(inode); | 365 | ipc_ns = get_ns_from_inode(inode); |
266 | info = MQUEUE_I(inode); | 366 | info = MQUEUE_I(inode); |
267 | spin_lock(&info->lock); | 367 | spin_lock(&info->lock); |
268 | for (i = 0; i < info->attr.mq_curmsgs; i++) | 368 | while ((msg = msg_get(info)) != NULL) |
269 | free_msg(info->messages[i]); | 369 | free_msg(msg); |
270 | if (is_vmalloc_addr(info->messages)) | ||
271 | vfree(info->messages); | ||
272 | else | ||
273 | kfree(info->messages); | ||
274 | spin_unlock(&info->lock); | 370 | spin_unlock(&info->lock); |
275 | 371 | ||
276 | /* Total amount of bytes accounted for the mqueue */ | 372 | /* Total amount of bytes accounted for the mqueue */ |
277 | mq_bytes = info->attr.mq_maxmsg * (sizeof(struct msg_msg *) | 373 | mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) + |
278 | + info->attr.mq_msgsize); | 374 | min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) * |
375 | sizeof(struct posix_msg_tree_node); | ||
376 | |||
377 | mq_bytes = mq_treesize + (info->attr.mq_maxmsg * | ||
378 | info->attr.mq_msgsize); | ||
379 | |||
279 | user = info->user; | 380 | user = info->user; |
280 | if (user) { | 381 | if (user) { |
281 | spin_lock(&mq_lock); | 382 | spin_lock(&mq_lock); |
@@ -495,26 +596,6 @@ static struct ext_wait_queue *wq_get_first_waiter( | |||
495 | return list_entry(ptr, struct ext_wait_queue, list); | 596 | return list_entry(ptr, struct ext_wait_queue, list); |
496 | } | 597 | } |
497 | 598 | ||
498 | /* Auxiliary functions to manipulate messages' list */ | ||
499 | static void msg_insert(struct msg_msg *ptr, struct mqueue_inode_info *info) | ||
500 | { | ||
501 | int k; | ||
502 | |||
503 | k = info->attr.mq_curmsgs - 1; | ||
504 | while (k >= 0 && info->messages[k]->m_type >= ptr->m_type) { | ||
505 | info->messages[k + 1] = info->messages[k]; | ||
506 | k--; | ||
507 | } | ||
508 | info->attr.mq_curmsgs++; | ||
509 | info->qsize += ptr->m_ts; | ||
510 | info->messages[k + 1] = ptr; | ||
511 | } | ||
512 | |||
513 | static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) | ||
514 | { | ||
515 | info->qsize -= info->messages[--info->attr.mq_curmsgs]->m_ts; | ||
516 | return info->messages[info->attr.mq_curmsgs]; | ||
517 | } | ||
518 | 599 | ||
519 | static inline void set_cookie(struct sk_buff *skb, char code) | 600 | static inline void set_cookie(struct sk_buff *skb, char code) |
520 | { | 601 | { |
@@ -848,7 +929,8 @@ static inline void pipelined_receive(struct mqueue_inode_info *info) | |||
848 | wake_up_interruptible(&info->wait_q); | 929 | wake_up_interruptible(&info->wait_q); |
849 | return; | 930 | return; |
850 | } | 931 | } |
851 | msg_insert(sender->msg, info); | 932 | if (msg_insert(sender->msg, info)) |
933 | return; | ||
852 | list_del(&sender->list); | 934 | list_del(&sender->list); |
853 | sender->state = STATE_PENDING; | 935 | sender->state = STATE_PENDING; |
854 | wake_up_process(sender->task); | 936 | wake_up_process(sender->task); |
@@ -936,7 +1018,12 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, | |||
936 | pipelined_send(info, msg_ptr, receiver); | 1018 | pipelined_send(info, msg_ptr, receiver); |
937 | } else { | 1019 | } else { |
938 | /* adds message to the queue */ | 1020 | /* adds message to the queue */ |
939 | msg_insert(msg_ptr, info); | 1021 | if (msg_insert(msg_ptr, info)) { |
1022 | free_msg(msg_ptr); | ||
1023 | ret = -ENOMEM; | ||
1024 | spin_unlock(&info->lock); | ||
1025 | goto out_fput; | ||
1026 | } | ||
940 | __do_notify(info); | 1027 | __do_notify(info); |
941 | } | 1028 | } |
942 | inode->i_atime = inode->i_mtime = inode->i_ctime = | 1029 | inode->i_atime = inode->i_mtime = inode->i_ctime = |