diff options
author | Nadia Derbey <Nadia.Derbey@bull.net> | 2008-07-25 04:48:08 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-25 13:53:42 -0400 |
commit | 9eefe520c814f6f62c5d36a2ddcd3fb99dfdb30e (patch) | |
tree | 064ce99674f144b681f8d365d1e20d99c8078d0c /ipc/ipc_sysctl.c | |
parent | f1a43f93f0f3bab418800eaccb9e2e3b5427e173 (diff) |
ipc: do not use a negative value to re-enable msgmni automatic recomputing
This patch proposes an alternative to the "magical
positive-versus-negative number trick" Andrew complained about last week
in http://lkml.org/lkml/2008/6/24/418.
This had been introduced with the patches that scale msgmni to the amount
of lowmem. With these patches, msgmni has a registered notification
routine that recomputes msgmni value upon memory add/remove or ipc
namespace creation/ removal.
When msgmni is changed from user space (i.e. value written to the proc
file), that notification routine is unregistered, and the way to make it
registered back is to write a negative value into the proc file. This is
the "magical positive-versus-negative number trick".
To fix this, a new proc file is introduced: /proc/sys/kernel/auto_msgmni.
This file acts as ON/OFF for msgmni automatic recomputing.
With this patch, the process is the following:
1) kernel boots in "automatic recomputing mode"
/proc/sys/kernel/msgmni contains the value that has been computed (depends
on lowmem)
/proc/sys/kernel/automatic_msgmni contains "1"
2) echo <val> > /proc/sys/kernel/msgmni
. sets msg_ctlmni to <val>
. de-activates automatic recomputing (i.e. if, say, some memory is added
msgmni won't be recomputed anymore)
. /proc/sys/kernel/automatic_msgmni now contains "0"
3) echo "0" > /proc/sys/kernel/automatic_msgmni
. de-activates msgmni automatic recomputing
this has the same effect as 2) except that msg_ctlmni's value stays
blocked at its current value)
3) echo "1" > /proc/sys/kernel/automatic_msgmni
. recomputes msgmni's value based on the current available memory size
and number of ipc namespaces
. re-activates automatic recomputing for msgmni.
Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Solofo Ramangalahy <Solofo.Ramangalahy@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc/ipc_sysctl.c')
-rw-r--r-- | ipc/ipc_sysctl.c | 72 |
1 files changed, 59 insertions, 13 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index d3497465cc0a..69bc85978ba0 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c | |||
@@ -27,15 +27,17 @@ static void *get_ipc(ctl_table *table) | |||
27 | } | 27 | } |
28 | 28 | ||
29 | /* | 29 | /* |
30 | * Routine that is called when a tunable has successfully been changed by | 30 | * Routine that is called when the file "auto_msgmni" has successfully been |
31 | * hand and it has a callback routine registered on the ipc namespace notifier | 31 | * written. |
32 | * chain: we don't want such tunables to be recomputed anymore upon memory | 32 | * Two values are allowed: |
33 | * add/remove or ipc namespace creation/removal. | 33 | * 0: unregister msgmni's callback routine from the ipc namespace notifier |
34 | * They can come back to a recomputable state by being set to a <0 value. | 34 | * chain. This means that msgmni won't be recomputed anymore upon memory |
35 | * add/remove or ipc namespace creation/removal. | ||
36 | * 1: register back the callback routine. | ||
35 | */ | 37 | */ |
36 | static void tunable_set_callback(int val) | 38 | static void ipc_auto_callback(int val) |
37 | { | 39 | { |
38 | if (val >= 0) | 40 | if (!val) |
39 | unregister_ipcns_notifier(current->nsproxy->ipc_ns); | 41 | unregister_ipcns_notifier(current->nsproxy->ipc_ns); |
40 | else { | 42 | else { |
41 | /* | 43 | /* |
@@ -71,7 +73,12 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, | |||
71 | rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); | 73 | rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); |
72 | 74 | ||
73 | if (write && !rc && lenp_bef == *lenp) | 75 | if (write && !rc && lenp_bef == *lenp) |
74 | tunable_set_callback(*((int *)(ipc_table.data))); | 76 | /* |
77 | * Tunable has successfully been changed by hand. Disable its | ||
78 | * automatic adjustment. This simply requires unregistering | ||
79 | * the notifiers that trigger recalculation. | ||
80 | */ | ||
81 | unregister_ipcns_notifier(current->nsproxy->ipc_ns); | ||
75 | 82 | ||
76 | return rc; | 83 | return rc; |
77 | } | 84 | } |
@@ -87,10 +94,39 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, | |||
87 | lenp, ppos); | 94 | lenp, ppos); |
88 | } | 95 | } |
89 | 96 | ||
97 | static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, | ||
98 | struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) | ||
99 | { | ||
100 | struct ctl_table ipc_table; | ||
101 | size_t lenp_bef = *lenp; | ||
102 | int oldval; | ||
103 | int rc; | ||
104 | |||
105 | memcpy(&ipc_table, table, sizeof(ipc_table)); | ||
106 | ipc_table.data = get_ipc(table); | ||
107 | oldval = *((int *)(ipc_table.data)); | ||
108 | |||
109 | rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos); | ||
110 | |||
111 | if (write && !rc && lenp_bef == *lenp) { | ||
112 | int newval = *((int *)(ipc_table.data)); | ||
113 | /* | ||
114 | * The file "auto_msgmni" has correctly been set. | ||
115 | * React by (un)registering the corresponding tunable, if the | ||
116 | * value has changed. | ||
117 | */ | ||
118 | if (newval != oldval) | ||
119 | ipc_auto_callback(newval); | ||
120 | } | ||
121 | |||
122 | return rc; | ||
123 | } | ||
124 | |||
90 | #else | 125 | #else |
91 | #define proc_ipc_doulongvec_minmax NULL | 126 | #define proc_ipc_doulongvec_minmax NULL |
92 | #define proc_ipc_dointvec NULL | 127 | #define proc_ipc_dointvec NULL |
93 | #define proc_ipc_callback_dointvec NULL | 128 | #define proc_ipc_callback_dointvec NULL |
129 | #define proc_ipcauto_dointvec_minmax NULL | ||
94 | #endif | 130 | #endif |
95 | 131 | ||
96 | #ifdef CONFIG_SYSCTL_SYSCALL | 132 | #ifdef CONFIG_SYSCTL_SYSCALL |
@@ -142,14 +178,11 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, | |||
142 | rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, | 178 | rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, |
143 | newlen); | 179 | newlen); |
144 | 180 | ||
145 | if (newval && newlen && rc > 0) { | 181 | if (newval && newlen && rc > 0) |
146 | /* | 182 | /* |
147 | * Tunable has successfully been changed from userland | 183 | * Tunable has successfully been changed from userland |
148 | */ | 184 | */ |
149 | int *data = get_ipc(table); | 185 | unregister_ipcns_notifier(current->nsproxy->ipc_ns); |
150 | |||
151 | tunable_set_callback(*data); | ||
152 | } | ||
153 | 186 | ||
154 | return rc; | 187 | return rc; |
155 | } | 188 | } |
@@ -158,6 +191,9 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, | |||
158 | #define sysctl_ipc_registered_data NULL | 191 | #define sysctl_ipc_registered_data NULL |
159 | #endif | 192 | #endif |
160 | 193 | ||
194 | static int zero; | ||
195 | static int one = 1; | ||
196 | |||
161 | static struct ctl_table ipc_kern_table[] = { | 197 | static struct ctl_table ipc_kern_table[] = { |
162 | { | 198 | { |
163 | .ctl_name = KERN_SHMMAX, | 199 | .ctl_name = KERN_SHMMAX, |
@@ -222,6 +258,16 @@ static struct ctl_table ipc_kern_table[] = { | |||
222 | .proc_handler = proc_ipc_dointvec, | 258 | .proc_handler = proc_ipc_dointvec, |
223 | .strategy = sysctl_ipc_data, | 259 | .strategy = sysctl_ipc_data, |
224 | }, | 260 | }, |
261 | { | ||
262 | .ctl_name = CTL_UNNUMBERED, | ||
263 | .procname = "auto_msgmni", | ||
264 | .data = &init_ipc_ns.auto_msgmni, | ||
265 | .maxlen = sizeof(int), | ||
266 | .mode = 0644, | ||
267 | .proc_handler = proc_ipcauto_dointvec_minmax, | ||
268 | .extra1 = &zero, | ||
269 | .extra2 = &one, | ||
270 | }, | ||
225 | {} | 271 | {} |
226 | }; | 272 | }; |
227 | 273 | ||