diff options
| author | Nadia Derbey <Nadia.Derbey@bull.net> | 2008-07-25 04:48:08 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-25 13:53:42 -0400 |
| commit | 9eefe520c814f6f62c5d36a2ddcd3fb99dfdb30e (patch) | |
| tree | 064ce99674f144b681f8d365d1e20d99c8078d0c | |
| parent | f1a43f93f0f3bab418800eaccb9e2e3b5427e173 (diff) | |
ipc: do not use a negative value to re-enable msgmni automatic recomputing
This patch proposes an alternative to the "magical
positive-versus-negative number trick" Andrew complained about last week
in http://lkml.org/lkml/2008/6/24/418.
This had been introduced with the patches that scale msgmni to the amount
of lowmem. With these patches, msgmni has a registered notification
routine that recomputes msgmni value upon memory add/remove or ipc
namespace creation/ removal.
When msgmni is changed from user space (i.e. value written to the proc
file), that notification routine is unregistered, and the way to make it
registered back is to write a negative value into the proc file. This is
the "magical positive-versus-negative number trick".
To fix this, a new proc file is introduced: /proc/sys/kernel/auto_msgmni.
This file acts as ON/OFF for msgmni automatic recomputing.
With this patch, the process is the following:
1) kernel boots in "automatic recomputing mode"
/proc/sys/kernel/msgmni contains the value that has been computed (depends
on lowmem)
/proc/sys/kernel/automatic_msgmni contains "1"
2) echo <val> > /proc/sys/kernel/msgmni
. sets msg_ctlmni to <val>
. de-activates automatic recomputing (i.e. if, say, some memory is added
msgmni won't be recomputed anymore)
. /proc/sys/kernel/automatic_msgmni now contains "0"
3) echo "0" > /proc/sys/kernel/automatic_msgmni
. de-activates msgmni automatic recomputing
this has the same effect as 2) except that msg_ctlmni's value stays
blocked at its current value)
3) echo "1" > /proc/sys/kernel/automatic_msgmni
. recomputes msgmni's value based on the current available memory size
and number of ipc namespaces
. re-activates automatic recomputing for msgmni.
Signed-off-by: Nadia Derbey <Nadia.Derbey@bull.net>
Cc: Solofo Ramangalahy <Solofo.Ramangalahy@bull.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | include/linux/ipc_namespace.h | 3 | ||||
| -rw-r--r-- | ipc/ipc_sysctl.c | 72 | ||||
| -rw-r--r-- | ipc/ipcns_notifier.c | 20 |
3 files changed, 76 insertions, 19 deletions
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index ea6c18a8b0d4..ea330f9e7100 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h | |||
| @@ -36,6 +36,7 @@ struct ipc_namespace { | |||
| 36 | int msg_ctlmni; | 36 | int msg_ctlmni; |
| 37 | atomic_t msg_bytes; | 37 | atomic_t msg_bytes; |
| 38 | atomic_t msg_hdrs; | 38 | atomic_t msg_hdrs; |
| 39 | int auto_msgmni; | ||
| 39 | 40 | ||
| 40 | size_t shm_ctlmax; | 41 | size_t shm_ctlmax; |
| 41 | size_t shm_ctlall; | 42 | size_t shm_ctlall; |
| @@ -53,7 +54,7 @@ extern atomic_t nr_ipc_ns; | |||
| 53 | 54 | ||
| 54 | extern int register_ipcns_notifier(struct ipc_namespace *); | 55 | extern int register_ipcns_notifier(struct ipc_namespace *); |
| 55 | extern int cond_register_ipcns_notifier(struct ipc_namespace *); | 56 | extern int cond_register_ipcns_notifier(struct ipc_namespace *); |
| 56 | extern int unregister_ipcns_notifier(struct ipc_namespace *); | 57 | extern void unregister_ipcns_notifier(struct ipc_namespace *); |
| 57 | extern int ipcns_notify(unsigned long); | 58 | extern int ipcns_notify(unsigned long); |
| 58 | 59 | ||
| 59 | #else /* CONFIG_SYSVIPC */ | 60 | #else /* CONFIG_SYSVIPC */ |
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index d3497465cc0a..69bc85978ba0 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c | |||
| @@ -27,15 +27,17 @@ static void *get_ipc(ctl_table *table) | |||
| 27 | } | 27 | } |
| 28 | 28 | ||
| 29 | /* | 29 | /* |
| 30 | * Routine that is called when a tunable has successfully been changed by | 30 | * Routine that is called when the file "auto_msgmni" has successfully been |
| 31 | * hand and it has a callback routine registered on the ipc namespace notifier | 31 | * written. |
| 32 | * chain: we don't want such tunables to be recomputed anymore upon memory | 32 | * Two values are allowed: |
| 33 | * add/remove or ipc namespace creation/removal. | 33 | * 0: unregister msgmni's callback routine from the ipc namespace notifier |
| 34 | * They can come back to a recomputable state by being set to a <0 value. | 34 | * chain. This means that msgmni won't be recomputed anymore upon memory |
| 35 | * add/remove or ipc namespace creation/removal. | ||
| 36 | * 1: register back the callback routine. | ||
| 35 | */ | 37 | */ |
| 36 | static void tunable_set_callback(int val) | 38 | static void ipc_auto_callback(int val) |
| 37 | { | 39 | { |
| 38 | if (val >= 0) | 40 | if (!val) |
| 39 | unregister_ipcns_notifier(current->nsproxy->ipc_ns); | 41 | unregister_ipcns_notifier(current->nsproxy->ipc_ns); |
| 40 | else { | 42 | else { |
| 41 | /* | 43 | /* |
| @@ -71,7 +73,12 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, | |||
| 71 | rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); | 73 | rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); |
| 72 | 74 | ||
| 73 | if (write && !rc && lenp_bef == *lenp) | 75 | if (write && !rc && lenp_bef == *lenp) |
| 74 | tunable_set_callback(*((int *)(ipc_table.data))); | 76 | /* |
| 77 | * Tunable has successfully been changed by hand. Disable its | ||
| 78 | * automatic adjustment. This simply requires unregistering | ||
| 79 | * the notifiers that trigger recalculation. | ||
| 80 | */ | ||
| 81 | unregister_ipcns_notifier(current->nsproxy->ipc_ns); | ||
| 75 | 82 | ||
| 76 | return rc; | 83 | return rc; |
| 77 | } | 84 | } |
| @@ -87,10 +94,39 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, | |||
| 87 | lenp, ppos); | 94 | lenp, ppos); |
| 88 | } | 95 | } |
| 89 | 96 | ||
| 97 | static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, | ||
| 98 | struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 99 | { | ||
| 100 | struct ctl_table ipc_table; | ||
| 101 | size_t lenp_bef = *lenp; | ||
| 102 | int oldval; | ||
| 103 | int rc; | ||
| 104 | |||
| 105 | memcpy(&ipc_table, table, sizeof(ipc_table)); | ||
| 106 | ipc_table.data = get_ipc(table); | ||
| 107 | oldval = *((int *)(ipc_table.data)); | ||
| 108 | |||
| 109 | rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos); | ||
| 110 | |||
| 111 | if (write && !rc && lenp_bef == *lenp) { | ||
| 112 | int newval = *((int *)(ipc_table.data)); | ||
| 113 | /* | ||
| 114 | * The file "auto_msgmni" has correctly been set. | ||
| 115 | * React by (un)registering the corresponding tunable, if the | ||
| 116 | * value has changed. | ||
| 117 | */ | ||
| 118 | if (newval != oldval) | ||
| 119 | ipc_auto_callback(newval); | ||
| 120 | } | ||
| 121 | |||
| 122 | return rc; | ||
| 123 | } | ||
| 124 | |||
| 90 | #else | 125 | #else |
| 91 | #define proc_ipc_doulongvec_minmax NULL | 126 | #define proc_ipc_doulongvec_minmax NULL |
| 92 | #define proc_ipc_dointvec NULL | 127 | #define proc_ipc_dointvec NULL |
| 93 | #define proc_ipc_callback_dointvec NULL | 128 | #define proc_ipc_callback_dointvec NULL |
| 129 | #define proc_ipcauto_dointvec_minmax NULL | ||
| 94 | #endif | 130 | #endif |
| 95 | 131 | ||
| 96 | #ifdef CONFIG_SYSCTL_SYSCALL | 132 | #ifdef CONFIG_SYSCTL_SYSCALL |
| @@ -142,14 +178,11 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, | |||
| 142 | rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, | 178 | rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, |
| 143 | newlen); | 179 | newlen); |
| 144 | 180 | ||
| 145 | if (newval && newlen && rc > 0) { | 181 | if (newval && newlen && rc > 0) |
| 146 | /* | 182 | /* |
| 147 | * Tunable has successfully been changed from userland | 183 | * Tunable has successfully been changed from userland |
| 148 | */ | 184 | */ |
| 149 | int *data = get_ipc(table); | 185 | unregister_ipcns_notifier(current->nsproxy->ipc_ns); |
| 150 | |||
| 151 | tunable_set_callback(*data); | ||
| 152 | } | ||
| 153 | 186 | ||
| 154 | return rc; | 187 | return rc; |
| 155 | } | 188 | } |
| @@ -158,6 +191,9 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, | |||
| 158 | #define sysctl_ipc_registered_data NULL | 191 | #define sysctl_ipc_registered_data NULL |
| 159 | #endif | 192 | #endif |
| 160 | 193 | ||
| 194 | static int zero; | ||
| 195 | static int one = 1; | ||
| 196 | |||
| 161 | static struct ctl_table ipc_kern_table[] = { | 197 | static struct ctl_table ipc_kern_table[] = { |
| 162 | { | 198 | { |
| 163 | .ctl_name = KERN_SHMMAX, | 199 | .ctl_name = KERN_SHMMAX, |
| @@ -222,6 +258,16 @@ static struct ctl_table ipc_kern_table[] = { | |||
| 222 | .proc_handler = proc_ipc_dointvec, | 258 | .proc_handler = proc_ipc_dointvec, |
| 223 | .strategy = sysctl_ipc_data, | 259 | .strategy = sysctl_ipc_data, |
| 224 | }, | 260 | }, |
| 261 | { | ||
| 262 | .ctl_name = CTL_UNNUMBERED, | ||
| 263 | .procname = "auto_msgmni", | ||
| 264 | .data = &init_ipc_ns.auto_msgmni, | ||
| 265 | .maxlen = sizeof(int), | ||
| 266 | .mode = 0644, | ||
| 267 | .proc_handler = proc_ipcauto_dointvec_minmax, | ||
| 268 | .extra1 = &zero, | ||
| 269 | .extra2 = &one, | ||
| 270 | }, | ||
| 225 | {} | 271 | {} |
| 226 | }; | 272 | }; |
| 227 | 273 | ||
diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c index 70ff09183f7b..b9b31a4f77e1 100644 --- a/ipc/ipcns_notifier.c +++ b/ipc/ipcns_notifier.c | |||
| @@ -55,25 +55,35 @@ static int ipcns_callback(struct notifier_block *self, | |||
| 55 | 55 | ||
| 56 | int register_ipcns_notifier(struct ipc_namespace *ns) | 56 | int register_ipcns_notifier(struct ipc_namespace *ns) |
| 57 | { | 57 | { |
| 58 | int rc; | ||
| 59 | |||
| 58 | memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); | 60 | memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); |
| 59 | ns->ipcns_nb.notifier_call = ipcns_callback; | 61 | ns->ipcns_nb.notifier_call = ipcns_callback; |
| 60 | ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; | 62 | ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; |
| 61 | return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); | 63 | rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); |
| 64 | if (!rc) | ||
| 65 | ns->auto_msgmni = 1; | ||
| 66 | return rc; | ||
| 62 | } | 67 | } |
| 63 | 68 | ||
| 64 | int cond_register_ipcns_notifier(struct ipc_namespace *ns) | 69 | int cond_register_ipcns_notifier(struct ipc_namespace *ns) |
| 65 | { | 70 | { |
| 71 | int rc; | ||
| 72 | |||
| 66 | memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); | 73 | memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); |
| 67 | ns->ipcns_nb.notifier_call = ipcns_callback; | 74 | ns->ipcns_nb.notifier_call = ipcns_callback; |
| 68 | ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; | 75 | ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; |
| 69 | return blocking_notifier_chain_cond_register(&ipcns_chain, | 76 | rc = blocking_notifier_chain_cond_register(&ipcns_chain, |
| 70 | &ns->ipcns_nb); | 77 | &ns->ipcns_nb); |
| 78 | if (!rc) | ||
| 79 | ns->auto_msgmni = 1; | ||
| 80 | return rc; | ||
| 71 | } | 81 | } |
| 72 | 82 | ||
| 73 | int unregister_ipcns_notifier(struct ipc_namespace *ns) | 83 | void unregister_ipcns_notifier(struct ipc_namespace *ns) |
| 74 | { | 84 | { |
| 75 | return blocking_notifier_chain_unregister(&ipcns_chain, | 85 | blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb); |
| 76 | &ns->ipcns_nb); | 86 | ns->auto_msgmni = 0; |
| 77 | } | 87 | } |
| 78 | 88 | ||
| 79 | int ipcns_notify(unsigned long val) | 89 | int ipcns_notify(unsigned long val) |
