aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
authorManfred Spraul <manfred@colorfullife.com>2014-12-12 19:58:17 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-13 15:42:52 -0500
commit0050ee059f7fc86b1df2527aaa14ed5dc72f9973 (patch)
tree9fa11fd54259ed433d89b0a483e172b8e75d8f23 /ipc
parente843e7d2c88b7db107a86bd2c7145dc715c058f4 (diff)
ipc/msg: increase MSGMNI, remove scaling
SysV can be abused to allocate locked kernel memory. For most systems, a small limit doesn't make sense, see the discussion with regards to SHMMAX. Therefore: increase MSGMNI to the maximum supported. And: If we ignore the risk of locking too much memory, then an automatic scaling of MSGMNI doesn't make sense. Therefore the logic can be removed. The code preserves auto_msgmni to avoid breaking any user space applications that expect that the value exists. Notes: 1) If an administrator must limit the memory allocations, then he can set MSGMNI as necessary. Or he can disable sysv entirely (as e.g. done by Android). 2) MSGMAX and MSGMNB are intentionally not increased, as these values are used to control latency vs. throughput: If MSGMNB is large, then msgsnd() just returns and more messages can be queued before a task switch to a task that calls msgrcv() is forced. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Manfred Spraul <manfred@colorfullife.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Rafael Aquini <aquini@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc')
-rw-r--r--ipc/Makefile2
-rw-r--r--ipc/ipc_sysctl.c93
-rw-r--r--ipc/ipcns_notifier.c92
-rw-r--r--ipc/msg.c36
-rw-r--r--ipc/namespace.c22
-rw-r--r--ipc/util.c40
6 files changed, 19 insertions, 266 deletions
diff --git a/ipc/Makefile b/ipc/Makefile
index 9075e172e52c..86c7300ecdf5 100644
--- a/ipc/Makefile
+++ b/ipc/Makefile
@@ -3,7 +3,7 @@
3# 3#
4 4
5obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o 5obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o
6obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o syscall.o 6obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o syscall.o
7obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o 7obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
8obj_mq-$(CONFIG_COMPAT) += compat_mq.o 8obj_mq-$(CONFIG_COMPAT) += compat_mq.o
9obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y) 9obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index e8075b247497..8ad93c29f511 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -62,29 +62,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
62 return err; 62 return err;
63} 63}
64 64
65static int proc_ipc_callback_dointvec_minmax(struct ctl_table *table, int write,
66 void __user *buffer, size_t *lenp, loff_t *ppos)
67{
68 struct ctl_table ipc_table;
69 size_t lenp_bef = *lenp;
70 int rc;
71
72 memcpy(&ipc_table, table, sizeof(ipc_table));
73 ipc_table.data = get_ipc(table);
74
75 rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
76
77 if (write && !rc && lenp_bef == *lenp)
78 /*
79 * Tunable has successfully been changed by hand. Disable its
80 * automatic adjustment. This simply requires unregistering
81 * the notifiers that trigger recalculation.
82 */
83 unregister_ipcns_notifier(current->nsproxy->ipc_ns);
84
85 return rc;
86}
87
88static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write, 65static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
89 void __user *buffer, size_t *lenp, loff_t *ppos) 66 void __user *buffer, size_t *lenp, loff_t *ppos)
90{ 67{
@@ -96,54 +73,19 @@ static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
96 lenp, ppos); 73 lenp, ppos);
97} 74}
98 75
99/* 76static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
100 * Routine that is called when the file "auto_msgmni" has successfully been
101 * written.
102 * Two values are allowed:
103 * 0: unregister msgmni's callback routine from the ipc namespace notifier
104 * chain. This means that msgmni won't be recomputed anymore upon memory
105 * add/remove or ipc namespace creation/removal.
106 * 1: register back the callback routine.
107 */
108static void ipc_auto_callback(int val)
109{
110 if (!val)
111 unregister_ipcns_notifier(current->nsproxy->ipc_ns);
112 else {
113 /*
114 * Re-enable automatic recomputing only if not already
115 * enabled.
116 */
117 recompute_msgmni(current->nsproxy->ipc_ns);
118 cond_register_ipcns_notifier(current->nsproxy->ipc_ns);
119 }
120}
121
122static int proc_ipcauto_dointvec_minmax(struct ctl_table *table, int write,
123 void __user *buffer, size_t *lenp, loff_t *ppos) 77 void __user *buffer, size_t *lenp, loff_t *ppos)
124{ 78{
125 struct ctl_table ipc_table; 79 struct ctl_table ipc_table;
126 int oldval; 80 int dummy = 0;
127 int rc;
128 81
129 memcpy(&ipc_table, table, sizeof(ipc_table)); 82 memcpy(&ipc_table, table, sizeof(ipc_table));
130 ipc_table.data = get_ipc(table); 83 ipc_table.data = &dummy;
131 oldval = *((int *)(ipc_table.data));
132 84
133 rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); 85 if (write)
86 pr_info_once("writing to auto_msgmni has no effect");
134 87
135 if (write && !rc) { 88 return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
136 int newval = *((int *)(ipc_table.data));
137 /*
138 * The file "auto_msgmni" has correctly been set.
139 * React by (un)registering the corresponding tunable, if the
140 * value has changed.
141 */
142 if (newval != oldval)
143 ipc_auto_callback(newval);
144 }
145
146 return rc;
147} 89}
148 90
149#else 91#else
@@ -151,8 +93,7 @@ static int proc_ipcauto_dointvec_minmax(struct ctl_table *table, int write,
151#define proc_ipc_dointvec NULL 93#define proc_ipc_dointvec NULL
152#define proc_ipc_dointvec_minmax NULL 94#define proc_ipc_dointvec_minmax NULL
153#define proc_ipc_dointvec_minmax_orphans NULL 95#define proc_ipc_dointvec_minmax_orphans NULL
154#define proc_ipc_callback_dointvec_minmax NULL 96#define proc_ipc_auto_msgmni NULL
155#define proc_ipcauto_dointvec_minmax NULL
156#endif 97#endif
157 98
158static int zero; 99static int zero;
@@ -204,11 +145,20 @@ static struct ctl_table ipc_kern_table[] = {
204 .data = &init_ipc_ns.msg_ctlmni, 145 .data = &init_ipc_ns.msg_ctlmni,
205 .maxlen = sizeof(init_ipc_ns.msg_ctlmni), 146 .maxlen = sizeof(init_ipc_ns.msg_ctlmni),
206 .mode = 0644, 147 .mode = 0644,
207 .proc_handler = proc_ipc_callback_dointvec_minmax, 148 .proc_handler = proc_ipc_dointvec_minmax,
208 .extra1 = &zero, 149 .extra1 = &zero,
209 .extra2 = &int_max, 150 .extra2 = &int_max,
210 }, 151 },
211 { 152 {
153 .procname = "auto_msgmni",
154 .data = NULL,
155 .maxlen = sizeof(int),
156 .mode = 0644,
157 .proc_handler = proc_ipc_auto_msgmni,
158 .extra1 = &zero,
159 .extra2 = &one,
160 },
161 {
212 .procname = "msgmnb", 162 .procname = "msgmnb",
213 .data = &init_ipc_ns.msg_ctlmnb, 163 .data = &init_ipc_ns.msg_ctlmnb,
214 .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), 164 .maxlen = sizeof(init_ipc_ns.msg_ctlmnb),
@@ -224,15 +174,6 @@ static struct ctl_table ipc_kern_table[] = {
224 .mode = 0644, 174 .mode = 0644,
225 .proc_handler = proc_ipc_dointvec, 175 .proc_handler = proc_ipc_dointvec,
226 }, 176 },
227 {
228 .procname = "auto_msgmni",
229 .data = &init_ipc_ns.auto_msgmni,
230 .maxlen = sizeof(int),
231 .mode = 0644,
232 .proc_handler = proc_ipcauto_dointvec_minmax,
233 .extra1 = &zero,
234 .extra2 = &one,
235 },
236#ifdef CONFIG_CHECKPOINT_RESTORE 177#ifdef CONFIG_CHECKPOINT_RESTORE
237 { 178 {
238 .procname = "sem_next_id", 179 .procname = "sem_next_id",
diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c
deleted file mode 100644
index b9b31a4f77e1..000000000000
--- a/ipc/ipcns_notifier.c
+++ /dev/null
@@ -1,92 +0,0 @@
1/*
2 * linux/ipc/ipcns_notifier.c
3 * Copyright (C) 2007 BULL SA. Nadia Derbey
4 *
5 * Notification mechanism for ipc namespaces:
6 * The callback routine registered in the memory chain invokes the ipcns
7 * notifier chain with the IPCNS_MEMCHANGED event.
8 * Each callback routine registered in the ipcns namespace recomputes msgmni
9 * for the owning namespace.
10 */
11
12#include <linux/msg.h>
13#include <linux/rcupdate.h>
14#include <linux/notifier.h>
15#include <linux/nsproxy.h>
16#include <linux/ipc_namespace.h>
17
18#include "util.h"
19
20
21
22static BLOCKING_NOTIFIER_HEAD(ipcns_chain);
23
24
25static int ipcns_callback(struct notifier_block *self,
26 unsigned long action, void *arg)
27{
28 struct ipc_namespace *ns;
29
30 switch (action) {
31 case IPCNS_MEMCHANGED: /* amount of lowmem has changed */
32 case IPCNS_CREATED:
33 case IPCNS_REMOVED:
34 /*
35 * It's time to recompute msgmni
36 */
37 ns = container_of(self, struct ipc_namespace, ipcns_nb);
38 /*
39 * No need to get a reference on the ns: the 1st job of
40 * free_ipc_ns() is to unregister the callback routine.
41 * blocking_notifier_chain_unregister takes the wr lock to do
42 * it.
43 * When this callback routine is called the rd lock is held by
44 * blocking_notifier_call_chain.
45 * So the ipc ns cannot be freed while we are here.
46 */
47 recompute_msgmni(ns);
48 break;
49 default:
50 break;
51 }
52
53 return NOTIFY_OK;
54}
55
56int register_ipcns_notifier(struct ipc_namespace *ns)
57{
58 int rc;
59
60 memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
61 ns->ipcns_nb.notifier_call = ipcns_callback;
62 ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
63 rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
64 if (!rc)
65 ns->auto_msgmni = 1;
66 return rc;
67}
68
69int cond_register_ipcns_notifier(struct ipc_namespace *ns)
70{
71 int rc;
72
73 memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
74 ns->ipcns_nb.notifier_call = ipcns_callback;
75 ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
76 rc = blocking_notifier_chain_cond_register(&ipcns_chain,
77 &ns->ipcns_nb);
78 if (!rc)
79 ns->auto_msgmni = 1;
80 return rc;
81}
82
83void unregister_ipcns_notifier(struct ipc_namespace *ns)
84{
85 blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb);
86 ns->auto_msgmni = 0;
87}
88
89int ipcns_notify(unsigned long val)
90{
91 return blocking_notifier_call_chain(&ipcns_chain, val, NULL);
92}
diff --git a/ipc/msg.c b/ipc/msg.c
index c5d8e3749985..a7261d5cbc89 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -989,43 +989,12 @@ SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
989 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill); 989 return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
990} 990}
991 991
992/*
993 * Scale msgmni with the available lowmem size: the memory dedicated to msg
994 * queues should occupy at most 1/MSG_MEM_SCALE of lowmem.
995 * Also take into account the number of nsproxies created so far.
996 * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range.
997 */
998void recompute_msgmni(struct ipc_namespace *ns)
999{
1000 struct sysinfo i;
1001 unsigned long allowed;
1002 int nb_ns;
1003
1004 si_meminfo(&i);
1005 allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit)
1006 / MSGMNB;
1007 nb_ns = atomic_read(&nr_ipc_ns);
1008 allowed /= nb_ns;
1009
1010 if (allowed < MSGMNI) {
1011 ns->msg_ctlmni = MSGMNI;
1012 return;
1013 }
1014
1015 if (allowed > IPCMNI / nb_ns) {
1016 ns->msg_ctlmni = IPCMNI / nb_ns;
1017 return;
1018 }
1019
1020 ns->msg_ctlmni = allowed;
1021}
1022 992
1023void msg_init_ns(struct ipc_namespace *ns) 993void msg_init_ns(struct ipc_namespace *ns)
1024{ 994{
1025 ns->msg_ctlmax = MSGMAX; 995 ns->msg_ctlmax = MSGMAX;
1026 ns->msg_ctlmnb = MSGMNB; 996 ns->msg_ctlmnb = MSGMNB;
1027 997 ns->msg_ctlmni = MSGMNI;
1028 recompute_msgmni(ns);
1029 998
1030 atomic_set(&ns->msg_bytes, 0); 999 atomic_set(&ns->msg_bytes, 0);
1031 atomic_set(&ns->msg_hdrs, 0); 1000 atomic_set(&ns->msg_hdrs, 0);
@@ -1069,9 +1038,6 @@ void __init msg_init(void)
1069{ 1038{
1070 msg_init_ns(&init_ipc_ns); 1039 msg_init_ns(&init_ipc_ns);
1071 1040
1072 printk(KERN_INFO "msgmni has been set to %d\n",
1073 init_ipc_ns.msg_ctlmni);
1074
1075 ipc_init_proc_interface("sysvipc/msg", 1041 ipc_init_proc_interface("sysvipc/msg",
1076 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", 1042 " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n",
1077 IPC_MSG_IDS, sysvipc_msg_proc_show); 1043 IPC_MSG_IDS, sysvipc_msg_proc_show);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index b54468e48e32..1a3ffd40356e 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -45,14 +45,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
45 msg_init_ns(ns); 45 msg_init_ns(ns);
46 shm_init_ns(ns); 46 shm_init_ns(ns);
47 47
48 /*
49 * msgmni has already been computed for the new ipc ns.
50 * Thus, do the ipcns creation notification before registering that
51 * new ipcns in the chain.
52 */
53 ipcns_notify(IPCNS_CREATED);
54 register_ipcns_notifier(ns);
55
56 ns->user_ns = get_user_ns(user_ns); 48 ns->user_ns = get_user_ns(user_ns);
57 49
58 return ns; 50 return ns;
@@ -99,25 +91,11 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
99 91
100static void free_ipc_ns(struct ipc_namespace *ns) 92static void free_ipc_ns(struct ipc_namespace *ns)
101{ 93{
102 /*
103 * Unregistering the hotplug notifier at the beginning guarantees
104 * that the ipc namespace won't be freed while we are inside the
105 * callback routine. Since the blocking_notifier_chain_XXX routines
106 * hold a rw lock on the notifier list, unregister_ipcns_notifier()
107 * won't take the rw lock before blocking_notifier_call_chain() has
108 * released the rd lock.
109 */
110 unregister_ipcns_notifier(ns);
111 sem_exit_ns(ns); 94 sem_exit_ns(ns);
112 msg_exit_ns(ns); 95 msg_exit_ns(ns);
113 shm_exit_ns(ns); 96 shm_exit_ns(ns);
114 atomic_dec(&nr_ipc_ns); 97 atomic_dec(&nr_ipc_ns);
115 98
116 /*
117 * Do the ipcns removal notification after decrementing nr_ipc_ns in
118 * order to have a correct value when recomputing msgmni.
119 */
120 ipcns_notify(IPCNS_REMOVED);
121 put_user_ns(ns->user_ns); 99 put_user_ns(ns->user_ns);
122 proc_free_inum(ns->proc_inum); 100 proc_free_inum(ns->proc_inum);
123 kfree(ns); 101 kfree(ns);
diff --git a/ipc/util.c b/ipc/util.c
index 88adc329888c..106bed0378ab 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -71,44 +71,6 @@ struct ipc_proc_iface {
71 int (*show)(struct seq_file *, void *); 71 int (*show)(struct seq_file *, void *);
72}; 72};
73 73
74static void ipc_memory_notifier(struct work_struct *work)
75{
76 ipcns_notify(IPCNS_MEMCHANGED);
77}
78
79static int ipc_memory_callback(struct notifier_block *self,
80 unsigned long action, void *arg)
81{
82 static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier);
83
84 switch (action) {
85 case MEM_ONLINE: /* memory successfully brought online */
86 case MEM_OFFLINE: /* or offline: it's time to recompute msgmni */
87 /*
88 * This is done by invoking the ipcns notifier chain with the
89 * IPC_MEMCHANGED event.
90 * In order not to keep the lock on the hotplug memory chain
91 * for too long, queue a work item that will, when waken up,
92 * activate the ipcns notification chain.
93 */
94 schedule_work(&ipc_memory_wq);
95 break;
96 case MEM_GOING_ONLINE:
97 case MEM_GOING_OFFLINE:
98 case MEM_CANCEL_ONLINE:
99 case MEM_CANCEL_OFFLINE:
100 default:
101 break;
102 }
103
104 return NOTIFY_OK;
105}
106
107static struct notifier_block ipc_memory_nb = {
108 .notifier_call = ipc_memory_callback,
109 .priority = IPC_CALLBACK_PRI,
110};
111
112/** 74/**
113 * ipc_init - initialise ipc subsystem 75 * ipc_init - initialise ipc subsystem
114 * 76 *
@@ -124,8 +86,6 @@ static int __init ipc_init(void)
124 sem_init(); 86 sem_init();
125 msg_init(); 87 msg_init();
126 shm_init(); 88 shm_init();
127 register_hotmemory_notifier(&ipc_memory_nb);
128 register_ipcns_notifier(&init_ipc_ns);
129 return 0; 89 return 0;
130} 90}
131device_initcall(ipc_init); 91device_initcall(ipc_init);