aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
authorMarcus Gelderie <redmnic@gmail.com>2015-08-06 18:46:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-08-06 21:39:39 -0400
commitde54b9ac253787c366bbfb28d901a31954eb3511 (patch)
treed1892200d83e3813dab248a9ab421497d1f6e15b /ipc
parent4469942bbbe5ebf845e04971d8c74e9b6178f9fa (diff)
ipc: modify message queue accounting to not take kernel data structures into account
A while back, the message queue implementation in the kernel was improved to use btrees to speed up retrieval of messages, in commit d6629859b36d ("ipc/mqueue: improve performance of send/recv"). That patch introducing the improved kernel handling of message queues (using btrees) has, as a by-product, changed the meaning of the QSIZE field in the pseudo-file created for the queue. Before, this field reflected the size of the user-data in the queue. Since, it also takes kernel data structures into account. For example, if 13 bytes of user data are in the queue, on my machine the file reports a size of 61 bytes. There was some discussion on this topic before (for example https://lkml.org/lkml/2014/10/1/115). Commenting on a th lkml, Michael Kerrisk gave the following background (https://lkml.org/lkml/2015/6/16/74): The pseudofiles in the mqueue filesystem (usually mounted at /dev/mqueue) expose fields with metadata describing a message queue. One of these fields, QSIZE, as originally implemented, showed the total number of bytes of user data in all messages in the message queue, and this feature was documented from the beginning in the mq_overview(7) page. In 3.5, some other (useful) work happened to break the user-space API in a couple of places, including the value exposed via QSIZE, which now includes a measure of kernel overhead bytes for the queue, a figure that renders QSIZE useless for its original purpose, since there's no way to deduce the number of overhead bytes consumed by the implementation. (The other user-space breakage was subsequently fixed.) This patch removes the accounting of kernel data structures in the queue. Reporting the size of these data-structures in the QSIZE field was a breaking change (see Michael's comment above). Without the QSIZE field reporting the total size of user-data in the queue, there is no way to deduce this number. It should be noted that the resource limit RLIMIT_MSGQUEUE is counted against the worst-case size of the queue (in both the old and the new implementation). Therefore, the kernel overhead accounting in QSIZE is not necessary to help the user understand the limitations RLIMIT imposes on the processes. Signed-off-by: Marcus Gelderie <redmnic@gmail.com> Acked-by: Doug Ledford <dledford@redhat.com> Acked-by: Michael Kerrisk <mtk.manpages@gmail.com> Acked-by: Davidlohr Bueso <dbueso@suse.de> Cc: David Howells <dhowells@redhat.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: John Duffy <jb_duffy@btinternet.com> Cc: Arto Bendiken <arto@bendiken.net> Cc: Manfred Spraul <manfred@colorfullife.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc')
-rw-r--r--ipc/mqueue.c5
1 files changed, 0 insertions, 5 deletions
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index a24ba9fe5bb8..161a1807e6ef 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -142,7 +142,6 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
142 if (!leaf) 142 if (!leaf)
143 return -ENOMEM; 143 return -ENOMEM;
144 INIT_LIST_HEAD(&leaf->msg_list); 144 INIT_LIST_HEAD(&leaf->msg_list);
145 info->qsize += sizeof(*leaf);
146 } 145 }
147 leaf->priority = msg->m_type; 146 leaf->priority = msg->m_type;
148 rb_link_node(&leaf->rb_node, parent, p); 147 rb_link_node(&leaf->rb_node, parent, p);
@@ -187,7 +186,6 @@ try_again:
187 "lazy leaf delete!\n"); 186 "lazy leaf delete!\n");
188 rb_erase(&leaf->rb_node, &info->msg_tree); 187 rb_erase(&leaf->rb_node, &info->msg_tree);
189 if (info->node_cache) { 188 if (info->node_cache) {
190 info->qsize -= sizeof(*leaf);
191 kfree(leaf); 189 kfree(leaf);
192 } else { 190 } else {
193 info->node_cache = leaf; 191 info->node_cache = leaf;
@@ -200,7 +198,6 @@ try_again:
200 if (list_empty(&leaf->msg_list)) { 198 if (list_empty(&leaf->msg_list)) {
201 rb_erase(&leaf->rb_node, &info->msg_tree); 199 rb_erase(&leaf->rb_node, &info->msg_tree);
202 if (info->node_cache) { 200 if (info->node_cache) {
203 info->qsize -= sizeof(*leaf);
204 kfree(leaf); 201 kfree(leaf);
205 } else { 202 } else {
206 info->node_cache = leaf; 203 info->node_cache = leaf;
@@ -1034,7 +1031,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
1034 /* Save our speculative allocation into the cache */ 1031 /* Save our speculative allocation into the cache */
1035 INIT_LIST_HEAD(&new_leaf->msg_list); 1032 INIT_LIST_HEAD(&new_leaf->msg_list);
1036 info->node_cache = new_leaf; 1033 info->node_cache = new_leaf;
1037 info->qsize += sizeof(*new_leaf);
1038 new_leaf = NULL; 1034 new_leaf = NULL;
1039 } else { 1035 } else {
1040 kfree(new_leaf); 1036 kfree(new_leaf);
@@ -1142,7 +1138,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
1142 /* Save our speculative allocation into the cache */ 1138 /* Save our speculative allocation into the cache */
1143 INIT_LIST_HEAD(&new_leaf->msg_list); 1139 INIT_LIST_HEAD(&new_leaf->msg_list);
1144 info->node_cache = new_leaf; 1140 info->node_cache = new_leaf;
1145 info->qsize += sizeof(*new_leaf);
1146 } else { 1141 } else {
1147 kfree(new_leaf); 1142 kfree(new_leaf);
1148 } 1143 }