diff options
-rw-r--r-- | Documentation/sysctl/kernel.txt | 22 | ||||
-rw-r--r-- | include/linux/ipc_namespace.h | 7 | ||||
-rw-r--r-- | include/linux/shm.h | 4 | ||||
-rw-r--r-- | ipc/ipc_sysctl.c | 36 | ||||
-rw-r--r-- | ipc/shm.c | 97 | ||||
-rw-r--r-- | kernel/exit.c | 1 |
6 files changed, 163 insertions, 4 deletions
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 1c7fb0a94e28..704e474a93df 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt | |||
@@ -61,6 +61,7 @@ show up in /proc/sys/kernel: | |||
61 | - rtsig-nr | 61 | - rtsig-nr |
62 | - sem | 62 | - sem |
63 | - sg-big-buff [ generic SCSI device (sg) ] | 63 | - sg-big-buff [ generic SCSI device (sg) ] |
64 | - shm_rmid_forced | ||
64 | - shmall | 65 | - shmall |
65 | - shmmax [ sysv ipc ] | 66 | - shmmax [ sysv ipc ] |
66 | - shmmni | 67 | - shmmni |
@@ -518,6 +519,27 @@ kernel. This value defaults to SHMMAX. | |||
518 | 519 | ||
519 | ============================================================== | 520 | ============================================================== |
520 | 521 | ||
522 | shm_rmid_forced: | ||
523 | |||
524 | Linux lets you set resource limits, including how much memory one | ||
525 | process can consume, via setrlimit(2). Unfortunately, shared memory | ||
526 | segments are allowed to exist without association with any process, and | ||
527 | thus might not be counted against any resource limits. If enabled, | ||
528 | shared memory segments are automatically destroyed when their attach | ||
529 | count becomes zero after a detach or a process termination. It will | ||
530 | also destroy segments that were created, but never attached to, on exit | ||
531 | from the process. The only use left for IPC_RMID is to immediately | ||
532 | destroy an unattached segment. Of course, this breaks the way things are | ||
533 | defined, so some applications might stop working. Note that this | ||
534 | feature will do you no good unless you also configure your resource | ||
535 | limits (in particular, RLIMIT_AS and RLIMIT_NPROC). Most systems don't | ||
536 | need this. | ||
537 | |||
538 | Note that if you change this from 0 to 1, already created segments | ||
539 | without users and with a dead originative process will be destroyed. | ||
540 | |||
541 | ============================================================== | ||
542 | |||
521 | softlockup_thresh: | 543 | softlockup_thresh: |
522 | 544 | ||
523 | This value can be used to lower the softlockup tolerance threshold. The | 545 | This value can be used to lower the softlockup tolerance threshold. The |
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index a6d1655f9607..8a297a5e794c 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h | |||
@@ -44,6 +44,11 @@ struct ipc_namespace { | |||
44 | size_t shm_ctlall; | 44 | size_t shm_ctlall; |
45 | int shm_ctlmni; | 45 | int shm_ctlmni; |
46 | int shm_tot; | 46 | int shm_tot; |
47 | /* | ||
48 | * Defines whether IPC_RMID is forced for _all_ shm segments regardless | ||
49 | * of shmctl() | ||
50 | */ | ||
51 | int shm_rmid_forced; | ||
47 | 52 | ||
48 | struct notifier_block ipcns_nb; | 53 | struct notifier_block ipcns_nb; |
49 | 54 | ||
@@ -72,6 +77,7 @@ extern int register_ipcns_notifier(struct ipc_namespace *); | |||
72 | extern int cond_register_ipcns_notifier(struct ipc_namespace *); | 77 | extern int cond_register_ipcns_notifier(struct ipc_namespace *); |
73 | extern void unregister_ipcns_notifier(struct ipc_namespace *); | 78 | extern void unregister_ipcns_notifier(struct ipc_namespace *); |
74 | extern int ipcns_notify(unsigned long); | 79 | extern int ipcns_notify(unsigned long); |
80 | extern void shm_destroy_orphaned(struct ipc_namespace *ns); | ||
75 | #else /* CONFIG_SYSVIPC */ | 81 | #else /* CONFIG_SYSVIPC */ |
76 | static inline int register_ipcns_notifier(struct ipc_namespace *ns) | 82 | static inline int register_ipcns_notifier(struct ipc_namespace *ns) |
77 | { return 0; } | 83 | { return 0; } |
@@ -79,6 +85,7 @@ static inline int cond_register_ipcns_notifier(struct ipc_namespace *ns) | |||
79 | { return 0; } | 85 | { return 0; } |
80 | static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { } | 86 | static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { } |
81 | static inline int ipcns_notify(unsigned long l) { return 0; } | 87 | static inline int ipcns_notify(unsigned long l) { return 0; } |
88 | static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {} | ||
82 | #endif /* CONFIG_SYSVIPC */ | 89 | #endif /* CONFIG_SYSVIPC */ |
83 | 90 | ||
84 | #ifdef CONFIG_POSIX_MQUEUE | 91 | #ifdef CONFIG_POSIX_MQUEUE |
diff --git a/include/linux/shm.h b/include/linux/shm.h index eca6235a46c0..7d27ffde0190 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h | |||
@@ -106,6 +106,7 @@ struct shmid_kernel /* private to the kernel */ | |||
106 | #ifdef CONFIG_SYSVIPC | 106 | #ifdef CONFIG_SYSVIPC |
107 | long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr); | 107 | long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr); |
108 | extern int is_file_shm_hugepages(struct file *file); | 108 | extern int is_file_shm_hugepages(struct file *file); |
109 | extern void exit_shm(struct task_struct *task); | ||
109 | #else | 110 | #else |
110 | static inline long do_shmat(int shmid, char __user *shmaddr, | 111 | static inline long do_shmat(int shmid, char __user *shmaddr, |
111 | int shmflg, unsigned long *addr) | 112 | int shmflg, unsigned long *addr) |
@@ -116,6 +117,9 @@ static inline int is_file_shm_hugepages(struct file *file) | |||
116 | { | 117 | { |
117 | return 0; | 118 | return 0; |
118 | } | 119 | } |
120 | static inline void exit_shm(struct task_struct *task) | ||
121 | { | ||
122 | } | ||
119 | #endif | 123 | #endif |
120 | 124 | ||
121 | #endif /* __KERNEL__ */ | 125 | #endif /* __KERNEL__ */ |
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 56410faa4550..00fba2bab87d 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c | |||
@@ -31,12 +31,37 @@ static int proc_ipc_dointvec(ctl_table *table, int write, | |||
31 | void __user *buffer, size_t *lenp, loff_t *ppos) | 31 | void __user *buffer, size_t *lenp, loff_t *ppos) |
32 | { | 32 | { |
33 | struct ctl_table ipc_table; | 33 | struct ctl_table ipc_table; |
34 | |||
34 | memcpy(&ipc_table, table, sizeof(ipc_table)); | 35 | memcpy(&ipc_table, table, sizeof(ipc_table)); |
35 | ipc_table.data = get_ipc(table); | 36 | ipc_table.data = get_ipc(table); |
36 | 37 | ||
37 | return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); | 38 | return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); |
38 | } | 39 | } |
39 | 40 | ||
41 | static int proc_ipc_dointvec_minmax(ctl_table *table, int write, | ||
42 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
43 | { | ||
44 | struct ctl_table ipc_table; | ||
45 | |||
46 | memcpy(&ipc_table, table, sizeof(ipc_table)); | ||
47 | ipc_table.data = get_ipc(table); | ||
48 | |||
49 | return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); | ||
50 | } | ||
51 | |||
52 | static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, | ||
53 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
54 | { | ||
55 | struct ipc_namespace *ns = current->nsproxy->ipc_ns; | ||
56 | int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
57 | |||
58 | if (err < 0) | ||
59 | return err; | ||
60 | if (ns->shm_rmid_forced) | ||
61 | shm_destroy_orphaned(ns); | ||
62 | return err; | ||
63 | } | ||
64 | |||
40 | static int proc_ipc_callback_dointvec(ctl_table *table, int write, | 65 | static int proc_ipc_callback_dointvec(ctl_table *table, int write, |
41 | void __user *buffer, size_t *lenp, loff_t *ppos) | 66 | void __user *buffer, size_t *lenp, loff_t *ppos) |
42 | { | 67 | { |
@@ -125,6 +150,8 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, | |||
125 | #else | 150 | #else |
126 | #define proc_ipc_doulongvec_minmax NULL | 151 | #define proc_ipc_doulongvec_minmax NULL |
127 | #define proc_ipc_dointvec NULL | 152 | #define proc_ipc_dointvec NULL |
153 | #define proc_ipc_dointvec_minmax NULL | ||
154 | #define proc_ipc_dointvec_minmax_orphans NULL | ||
128 | #define proc_ipc_callback_dointvec NULL | 155 | #define proc_ipc_callback_dointvec NULL |
129 | #define proc_ipcauto_dointvec_minmax NULL | 156 | #define proc_ipcauto_dointvec_minmax NULL |
130 | #endif | 157 | #endif |
@@ -155,6 +182,15 @@ static struct ctl_table ipc_kern_table[] = { | |||
155 | .proc_handler = proc_ipc_dointvec, | 182 | .proc_handler = proc_ipc_dointvec, |
156 | }, | 183 | }, |
157 | { | 184 | { |
185 | .procname = "shm_rmid_forced", | ||
186 | .data = &init_ipc_ns.shm_rmid_forced, | ||
187 | .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), | ||
188 | .mode = 0644, | ||
189 | .proc_handler = proc_ipc_dointvec_minmax_orphans, | ||
190 | .extra1 = &zero, | ||
191 | .extra2 = &one, | ||
192 | }, | ||
193 | { | ||
158 | .procname = "msgmax", | 194 | .procname = "msgmax", |
159 | .data = &init_ipc_ns.msg_ctlmax, | 195 | .data = &init_ipc_ns.msg_ctlmax, |
160 | .maxlen = sizeof (init_ipc_ns.msg_ctlmax), | 196 | .maxlen = sizeof (init_ipc_ns.msg_ctlmax), |
@@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns) | |||
74 | ns->shm_ctlmax = SHMMAX; | 74 | ns->shm_ctlmax = SHMMAX; |
75 | ns->shm_ctlall = SHMALL; | 75 | ns->shm_ctlall = SHMALL; |
76 | ns->shm_ctlmni = SHMMNI; | 76 | ns->shm_ctlmni = SHMMNI; |
77 | ns->shm_rmid_forced = 0; | ||
77 | ns->shm_tot = 0; | 78 | ns->shm_tot = 0; |
78 | ipc_init_ids(&shm_ids(ns)); | 79 | ipc_init_ids(&shm_ids(ns)); |
79 | } | 80 | } |
@@ -187,6 +188,23 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) | |||
187 | } | 188 | } |
188 | 189 | ||
189 | /* | 190 | /* |
191 | * shm_may_destroy - identifies whether shm segment should be destroyed now | ||
192 | * | ||
193 | * Returns true if and only if there are no active users of the segment and | ||
194 | * one of the following is true: | ||
195 | * | ||
196 | * 1) shmctl(id, IPC_RMID, NULL) was called for this shp | ||
197 | * | ||
198 | * 2) sysctl kernel.shm_rmid_forced is set to 1. | ||
199 | */ | ||
200 | static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) | ||
201 | { | ||
202 | return (shp->shm_nattch == 0) && | ||
203 | (ns->shm_rmid_forced || | ||
204 | (shp->shm_perm.mode & SHM_DEST)); | ||
205 | } | ||
206 | |||
207 | /* | ||
190 | * remove the attach descriptor vma. | 208 | * remove the attach descriptor vma. |
191 | * free memory for segment if it is marked destroyed. | 209 | * free memory for segment if it is marked destroyed. |
192 | * The descriptor has already been removed from the current->mm->mmap list | 210 | * The descriptor has already been removed from the current->mm->mmap list |
@@ -206,11 +224,83 @@ static void shm_close(struct vm_area_struct *vma) | |||
206 | shp->shm_lprid = task_tgid_vnr(current); | 224 | shp->shm_lprid = task_tgid_vnr(current); |
207 | shp->shm_dtim = get_seconds(); | 225 | shp->shm_dtim = get_seconds(); |
208 | shp->shm_nattch--; | 226 | shp->shm_nattch--; |
209 | if(shp->shm_nattch == 0 && | 227 | if (shm_may_destroy(ns, shp)) |
210 | shp->shm_perm.mode & SHM_DEST) | 228 | shm_destroy(ns, shp); |
229 | else | ||
230 | shm_unlock(shp); | ||
231 | up_write(&shm_ids(ns).rw_mutex); | ||
232 | } | ||
233 | |||
234 | static int shm_try_destroy_current(int id, void *p, void *data) | ||
235 | { | ||
236 | struct ipc_namespace *ns = data; | ||
237 | struct shmid_kernel *shp = shm_lock(ns, id); | ||
238 | |||
239 | if (IS_ERR(shp)) | ||
240 | return 0; | ||
241 | |||
242 | if (shp->shm_cprid != task_tgid_vnr(current)) { | ||
243 | shm_unlock(shp); | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | if (shm_may_destroy(ns, shp)) | ||
248 | shm_destroy(ns, shp); | ||
249 | else | ||
250 | shm_unlock(shp); | ||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | static int shm_try_destroy_orphaned(int id, void *p, void *data) | ||
255 | { | ||
256 | struct ipc_namespace *ns = data; | ||
257 | struct shmid_kernel *shp = shm_lock(ns, id); | ||
258 | struct task_struct *task; | ||
259 | |||
260 | if (IS_ERR(shp)) | ||
261 | return 0; | ||
262 | |||
263 | /* | ||
264 | * We want to destroy segments without users and with already | ||
265 | * exit'ed originating process. | ||
266 | * | ||
267 | * XXX: the originating process may exist in another pid namespace. | ||
268 | */ | ||
269 | task = find_task_by_vpid(shp->shm_cprid); | ||
270 | if (task != NULL) { | ||
271 | shm_unlock(shp); | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | if (shm_may_destroy(ns, shp)) | ||
211 | shm_destroy(ns, shp); | 276 | shm_destroy(ns, shp); |
212 | else | 277 | else |
213 | shm_unlock(shp); | 278 | shm_unlock(shp); |
279 | return 0; | ||
280 | } | ||
281 | |||
282 | void shm_destroy_orphaned(struct ipc_namespace *ns) | ||
283 | { | ||
284 | down_write(&shm_ids(ns).rw_mutex); | ||
285 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); | ||
286 | up_write(&shm_ids(ns).rw_mutex); | ||
287 | } | ||
288 | |||
289 | |||
290 | void exit_shm(struct task_struct *task) | ||
291 | { | ||
292 | struct nsproxy *nsp = task->nsproxy; | ||
293 | struct ipc_namespace *ns; | ||
294 | |||
295 | if (!nsp) | ||
296 | return; | ||
297 | ns = nsp->ipc_ns; | ||
298 | if (!ns || !ns->shm_rmid_forced) | ||
299 | return; | ||
300 | |||
301 | /* Destroy all already created segments, but not mapped yet */ | ||
302 | down_write(&shm_ids(ns).rw_mutex); | ||
303 | idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); | ||
214 | up_write(&shm_ids(ns).rw_mutex); | 304 | up_write(&shm_ids(ns).rw_mutex); |
215 | } | 305 | } |
216 | 306 | ||
@@ -950,8 +1040,7 @@ out_nattch: | |||
950 | shp = shm_lock(ns, shmid); | 1040 | shp = shm_lock(ns, shmid); |
951 | BUG_ON(IS_ERR(shp)); | 1041 | BUG_ON(IS_ERR(shp)); |
952 | shp->shm_nattch--; | 1042 | shp->shm_nattch--; |
953 | if(shp->shm_nattch == 0 && | 1043 | if (shm_may_destroy(ns, shp)) |
954 | shp->shm_perm.mode & SHM_DEST) | ||
955 | shm_destroy(ns, shp); | 1044 | shm_destroy(ns, shp); |
956 | else | 1045 | else |
957 | shm_unlock(shp); | 1046 | shm_unlock(shp); |
diff --git a/kernel/exit.c b/kernel/exit.c index 9ee58bb9e60f..2913b3509d42 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -980,6 +980,7 @@ NORET_TYPE void do_exit(long code) | |||
980 | trace_sched_process_exit(tsk); | 980 | trace_sched_process_exit(tsk); |
981 | 981 | ||
982 | exit_sem(tsk); | 982 | exit_sem(tsk); |
983 | exit_shm(tsk); | ||
983 | exit_files(tsk); | 984 | exit_files(tsk); |
984 | exit_fs(tsk); | 985 | exit_fs(tsk); |
985 | check_stack_usage(); | 986 | check_stack_usage(); |