aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2007-02-20 16:57:53 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-02-20 20:10:13 -0500
commitbc56bba8f31bd99f350a5ebfd43d50f411b620c7 (patch)
tree68213ce8da2f7af8e3f39b77c078d6162776a95c
parent8ef8286689c6b5bc76212437b85bdd2ba749ee44 (diff)
[PATCH] shm: make sysv ipc shared memory use stacked files
The current ipc shared memory code runs into several problems because it does not quite use files like the rest of the kernel. With the option of backing ipc shared memory with either hugetlbfs or ordinary shared memory the problems got worse. With the added support for ipc namespaces things behaved so unexpected that we now have several bad namespace reference counting bugs when using what appears at first glance to be a reasonable idiom. So to attack these problems and hopefully make the code more maintainable this patch simply uses the files provided by other parts of the kernel and builds it's own files out of them. The shm files are allocated in do_shmat and freed when their reference count drops to zero with their last unmap. The file and vm operations that we don't want to implement or we don't implement completely we just delegate to the operations of our backing file. This means that we now get an accurate shm_nattch count for we have a hugetlbfs inode for backing store, and the shm accounting of last attach and last detach time work as well. This means that getting a reference to the ipc namespace when we create the file and dropping the referenece in the release method is now safe and correct. This means we no longer need a special case for clearing VM_MAYWRITE as our file descriptor now only has write permissions when we have requested write access when calling shmat. Although VM_SHARED is now cleared as well which I believe is harmless and is mostly likely a minor bug fix. By using the same set of operations for both the hugetlb case and regular shared memory case shmdt is not simplified and made slightly more correct as now the test "vma->vm_ops == &shm_vm_ops" is 100% accurate in spotting all shared memory regions generated from sysvipc shared memory. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Cc: Michal Piotrowski <michal.k.k.piotrowski@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--ipc/shm.c240
1 files changed, 155 insertions, 85 deletions
diff --git a/ipc/shm.c b/ipc/shm.c
index 5bb617f6306e..eb57e2254304 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -37,11 +37,21 @@
37#include <linux/seq_file.h> 37#include <linux/seq_file.h>
38#include <linux/mutex.h> 38#include <linux/mutex.h>
39#include <linux/nsproxy.h> 39#include <linux/nsproxy.h>
40#include <linux/mount.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42 43
43#include "util.h" 44#include "util.h"
44 45
46struct shm_file_data {
47 int id;
48 struct ipc_namespace *ns;
49 struct file *file;
50 const struct vm_operations_struct *vm_ops;
51};
52
53#define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
54
45static const struct file_operations shm_file_operations; 55static const struct file_operations shm_file_operations;
46static struct vm_operations_struct shm_vm_ops; 56static struct vm_operations_struct shm_vm_ops;
47 57
@@ -60,8 +70,8 @@ static struct ipc_ids init_shm_ids;
60 70
61static int newseg (struct ipc_namespace *ns, key_t key, 71static int newseg (struct ipc_namespace *ns, key_t key,
62 int shmflg, size_t size); 72 int shmflg, size_t size);
63static void shm_open (struct vm_area_struct *shmd); 73static void shm_open(struct vm_area_struct *vma);
64static void shm_close (struct vm_area_struct *shmd); 74static void shm_close(struct vm_area_struct *vma);
65static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp); 75static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
66#ifdef CONFIG_PROC_FS 76#ifdef CONFIG_PROC_FS
67static int sysvipc_shm_proc_show(struct seq_file *s, void *it); 77static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
@@ -150,11 +160,14 @@ static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp)
150 160
151 161
152 162
153static inline void shm_inc(struct ipc_namespace *ns, int id) 163/* This is called by fork, once for every shm attach. */
164static void shm_open(struct vm_area_struct *vma)
154{ 165{
166 struct file *file = vma->vm_file;
167 struct shm_file_data *sfd = shm_file_data(file);
155 struct shmid_kernel *shp; 168 struct shmid_kernel *shp;
156 169
157 shp = shm_lock(ns, id); 170 shp = shm_lock(sfd->ns, sfd->id);
158 BUG_ON(!shp); 171 BUG_ON(!shp);
159 shp->shm_atim = get_seconds(); 172 shp->shm_atim = get_seconds();
160 shp->shm_lprid = current->tgid; 173 shp->shm_lprid = current->tgid;
@@ -162,15 +175,6 @@ static inline void shm_inc(struct ipc_namespace *ns, int id)
162 shm_unlock(shp); 175 shm_unlock(shp);
163} 176}
164 177
165#define shm_file_ns(file) (*((struct ipc_namespace **)&(file)->private_data))
166
167/* This is called by fork, once for every shm attach. */
168static void shm_open(struct vm_area_struct *shmd)
169{
170 shm_inc(shm_file_ns(shmd->vm_file),
171 shmd->vm_file->f_path.dentry->d_inode->i_ino);
172}
173
174/* 178/*
175 * shm_destroy - free the struct shmid_kernel 179 * shm_destroy - free the struct shmid_kernel
176 * 180 *
@@ -195,23 +199,21 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
195} 199}
196 200
197/* 201/*
198 * remove the attach descriptor shmd. 202 * remove the attach descriptor vma.
199 * free memory for segment if it is marked destroyed. 203 * free memory for segment if it is marked destroyed.
200 * The descriptor has already been removed from the current->mm->mmap list 204 * The descriptor has already been removed from the current->mm->mmap list
201 * and will later be kfree()d. 205 * and will later be kfree()d.
202 */ 206 */
203static void shm_close (struct vm_area_struct *shmd) 207static void shm_close(struct vm_area_struct *vma)
204{ 208{
205 struct file * file = shmd->vm_file; 209 struct file * file = vma->vm_file;
206 int id = file->f_path.dentry->d_inode->i_ino; 210 struct shm_file_data *sfd = shm_file_data(file);
207 struct shmid_kernel *shp; 211 struct shmid_kernel *shp;
208 struct ipc_namespace *ns; 212 struct ipc_namespace *ns = sfd->ns;
209
210 ns = shm_file_ns(file);
211 213
212 mutex_lock(&shm_ids(ns).mutex); 214 mutex_lock(&shm_ids(ns).mutex);
213 /* remove from the list of attaches of the shm segment */ 215 /* remove from the list of attaches of the shm segment */
214 shp = shm_lock(ns, id); 216 shp = shm_lock(ns, sfd->id);
215 BUG_ON(!shp); 217 BUG_ON(!shp);
216 shp->shm_lprid = current->tgid; 218 shp->shm_lprid = current->tgid;
217 shp->shm_dtim = get_seconds(); 219 shp->shm_dtim = get_seconds();
@@ -224,46 +226,91 @@ static void shm_close (struct vm_area_struct *shmd)
224 mutex_unlock(&shm_ids(ns).mutex); 226 mutex_unlock(&shm_ids(ns).mutex);
225} 227}
226 228
229struct page *shm_nopage(struct vm_area_struct *vma, unsigned long address,
230 int *type)
231{
232 struct file *file = vma->vm_file;
233 struct shm_file_data *sfd = shm_file_data(file);
234
235 return sfd->vm_ops->nopage(vma, address, type);
236}
237
238#ifdef CONFIG_NUMA
239int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
240{
241 struct file *file = vma->vm_file;
242 struct shm_file_data *sfd = shm_file_data(file);
243 int err = 0;
244 if (sfd->vm_ops->set_policy)
245 err = sfd->vm_ops->set_policy(vma, new);
246 return err;
247}
248
249struct mempolicy *shm_get_policy(struct vm_area_struct *vma, unsigned long addr)
250{
251 struct file *file = vma->vm_file;
252 struct shm_file_data *sfd = shm_file_data(file);
253 struct mempolicy *pol = NULL;
254
255 if (sfd->vm_ops->get_policy)
256 pol = sfd->vm_ops->get_policy(vma, addr);
257 else
258 pol = vma->vm_policy;
259 return pol;
260}
261#endif
262
227static int shm_mmap(struct file * file, struct vm_area_struct * vma) 263static int shm_mmap(struct file * file, struct vm_area_struct * vma)
228{ 264{
265 struct shm_file_data *sfd = shm_file_data(file);
229 int ret; 266 int ret;
230 267
231 ret = shmem_mmap(file, vma); 268 ret = sfd->file->f_op->mmap(sfd->file, vma);
232 if (ret == 0) { 269 if (ret != 0)
233 vma->vm_ops = &shm_vm_ops; 270 return ret;
234 if (!(vma->vm_flags & VM_WRITE)) 271 sfd->vm_ops = vma->vm_ops;
235 vma->vm_flags &= ~VM_MAYWRITE; 272 vma->vm_ops = &shm_vm_ops;
236 shm_inc(shm_file_ns(file), file->f_path.dentry->d_inode->i_ino); 273 shm_open(vma);
237 }
238 274
239 return ret; 275 return ret;
240} 276}
241 277
242static int shm_release(struct inode *ino, struct file *file) 278static int shm_release(struct inode *ino, struct file *file)
243{ 279{
244 struct ipc_namespace *ns; 280 struct shm_file_data *sfd = shm_file_data(file);
245 281
246 ns = shm_file_ns(file); 282 put_ipc_ns(sfd->ns);
247 put_ipc_ns(ns); 283 shm_file_data(file) = NULL;
248 shm_file_ns(file) = NULL; 284 kfree(sfd);
249 return 0; 285 return 0;
250} 286}
251 287
288#ifndef CONFIG_MMU
289static unsigned long shm_get_unmapped_area(struct file *file,
290 unsigned long addr, unsigned long len, unsigned long pgoff,
291 unsigned long flags)
292{
293 struct shm_file_data *sfd = shm_file_data(file);
294 return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len, pgoff,
295 flags);
296}
297#else
298#define shm_get_unmapped_area NULL
299#endif
300
252static const struct file_operations shm_file_operations = { 301static const struct file_operations shm_file_operations = {
253 .mmap = shm_mmap, 302 .mmap = shm_mmap,
254 .release = shm_release, 303 .release = shm_release,
255#ifndef CONFIG_MMU 304 .get_unmapped_area = shm_get_unmapped_area,
256 .get_unmapped_area = shmem_get_unmapped_area,
257#endif
258}; 305};
259 306
260static struct vm_operations_struct shm_vm_ops = { 307static struct vm_operations_struct shm_vm_ops = {
261 .open = shm_open, /* callback for a new vm-area open */ 308 .open = shm_open, /* callback for a new vm-area open */
262 .close = shm_close, /* callback for when the vm-area is released */ 309 .close = shm_close, /* callback for when the vm-area is released */
263 .nopage = shmem_nopage, 310 .nopage = shm_nopage,
264#if defined(CONFIG_NUMA) && defined(CONFIG_SHMEM) 311#if defined(CONFIG_NUMA)
265 .set_policy = shmem_set_policy, 312 .set_policy = shm_set_policy,
266 .get_policy = shmem_get_policy, 313 .get_policy = shm_get_policy,
267#endif 314#endif
268}; 315};
269 316
@@ -330,13 +377,6 @@ static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size)
330 shp->shm_nattch = 0; 377 shp->shm_nattch = 0;
331 shp->id = shm_buildid(ns, id, shp->shm_perm.seq); 378 shp->id = shm_buildid(ns, id, shp->shm_perm.seq);
332 shp->shm_file = file; 379 shp->shm_file = file;
333 file->f_path.dentry->d_inode->i_ino = shp->id;
334
335 shm_file_ns(file) = get_ipc_ns(ns);
336
337 /* Hugetlb ops would have already been assigned. */
338 if (!(shmflg & SHM_HUGETLB))
339 file->f_op = &shm_file_operations;
340 380
341 ns->shm_tot += numpages; 381 ns->shm_tot += numpages;
342 shm_unlock(shp); 382 shm_unlock(shp);
@@ -607,10 +647,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
607 tbuf.shm_ctime = shp->shm_ctim; 647 tbuf.shm_ctime = shp->shm_ctim;
608 tbuf.shm_cpid = shp->shm_cprid; 648 tbuf.shm_cpid = shp->shm_cprid;
609 tbuf.shm_lpid = shp->shm_lprid; 649 tbuf.shm_lpid = shp->shm_lprid;
610 if (!is_file_hugepages(shp->shm_file)) 650 tbuf.shm_nattch = shp->shm_nattch;
611 tbuf.shm_nattch = shp->shm_nattch;
612 else
613 tbuf.shm_nattch = file_count(shp->shm_file) - 1;
614 shm_unlock(shp); 651 shm_unlock(shp);
615 if(copy_shmid_to_user (buf, &tbuf, version)) 652 if(copy_shmid_to_user (buf, &tbuf, version))
616 err = -EFAULT; 653 err = -EFAULT;
@@ -779,13 +816,16 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
779 unsigned long flags; 816 unsigned long flags;
780 unsigned long prot; 817 unsigned long prot;
781 int acc_mode; 818 int acc_mode;
782 void *user_addr; 819 unsigned long user_addr;
783 struct ipc_namespace *ns; 820 struct ipc_namespace *ns;
821 struct shm_file_data *sfd;
822 struct path path;
823 mode_t f_mode;
784 824
785 if (shmid < 0) { 825 err = -EINVAL;
786 err = -EINVAL; 826 if (shmid < 0)
787 goto out; 827 goto out;
788 } else if ((addr = (ulong)shmaddr)) { 828 else if ((addr = (ulong)shmaddr)) {
789 if (addr & (SHMLBA-1)) { 829 if (addr & (SHMLBA-1)) {
790 if (shmflg & SHM_RND) 830 if (shmflg & SHM_RND)
791 addr &= ~(SHMLBA-1); /* round down */ 831 addr &= ~(SHMLBA-1); /* round down */
@@ -793,12 +833,12 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
793#ifndef __ARCH_FORCE_SHMLBA 833#ifndef __ARCH_FORCE_SHMLBA
794 if (addr & ~PAGE_MASK) 834 if (addr & ~PAGE_MASK)
795#endif 835#endif
796 return -EINVAL; 836 goto out;
797 } 837 }
798 flags = MAP_SHARED | MAP_FIXED; 838 flags = MAP_SHARED | MAP_FIXED;
799 } else { 839 } else {
800 if ((shmflg & SHM_REMAP)) 840 if ((shmflg & SHM_REMAP))
801 return -EINVAL; 841 goto out;
802 842
803 flags = MAP_SHARED; 843 flags = MAP_SHARED;
804 } 844 }
@@ -806,9 +846,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
806 if (shmflg & SHM_RDONLY) { 846 if (shmflg & SHM_RDONLY) {
807 prot = PROT_READ; 847 prot = PROT_READ;
808 acc_mode = S_IRUGO; 848 acc_mode = S_IRUGO;
849 f_mode = FMODE_READ;
809 } else { 850 } else {
810 prot = PROT_READ | PROT_WRITE; 851 prot = PROT_READ | PROT_WRITE;
811 acc_mode = S_IRUGO | S_IWUGO; 852 acc_mode = S_IRUGO | S_IWUGO;
853 f_mode = FMODE_READ | FMODE_WRITE;
812 } 854 }
813 if (shmflg & SHM_EXEC) { 855 if (shmflg & SHM_EXEC) {
814 prot |= PROT_EXEC; 856 prot |= PROT_EXEC;
@@ -821,35 +863,50 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
821 */ 863 */
822 ns = current->nsproxy->ipc_ns; 864 ns = current->nsproxy->ipc_ns;
823 shp = shm_lock(ns, shmid); 865 shp = shm_lock(ns, shmid);
824 if(shp == NULL) { 866 if(shp == NULL)
825 err = -EINVAL;
826 goto out; 867 goto out;
827 } 868
828 err = shm_checkid(ns, shp,shmid); 869 err = shm_checkid(ns, shp,shmid);
829 if (err) { 870 if (err)
830 shm_unlock(shp); 871 goto out_unlock;
831 goto out; 872
832 } 873 err = -EACCES;
833 if (ipcperms(&shp->shm_perm, acc_mode)) { 874 if (ipcperms(&shp->shm_perm, acc_mode))
834 shm_unlock(shp); 875 goto out_unlock;
835 err = -EACCES;
836 goto out;
837 }
838 876
839 err = security_shm_shmat(shp, shmaddr, shmflg); 877 err = security_shm_shmat(shp, shmaddr, shmflg);
840 if (err) { 878 if (err)
841 shm_unlock(shp); 879 goto out_unlock;
842 return err; 880
843 } 881 path.dentry = dget(shp->shm_file->f_path.dentry);
844 882 path.mnt = mntget(shp->shm_file->f_path.mnt);
845 file = shp->shm_file;
846 size = i_size_read(file->f_path.dentry->d_inode);
847 shp->shm_nattch++; 883 shp->shm_nattch++;
884 size = i_size_read(path.dentry->d_inode);
848 shm_unlock(shp); 885 shm_unlock(shp);
849 886
887 err = -ENOMEM;
888 sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
889 if (!sfd)
890 goto out_put_path;
891
892 err = -ENOMEM;
893 file = get_empty_filp();
894 if (!file)
895 goto out_free;
896
897 file->f_op = &shm_file_operations;
898 file->private_data = sfd;
899 file->f_path = path;
900 file->f_mapping = shp->shm_file->f_mapping;
901 file->f_mode = f_mode;
902 sfd->id = shp->id;
903 sfd->ns = get_ipc_ns(ns);
904 sfd->file = shp->shm_file;
905 sfd->vm_ops = NULL;
906
850 down_write(&current->mm->mmap_sem); 907 down_write(&current->mm->mmap_sem);
851 if (addr && !(shmflg & SHM_REMAP)) { 908 if (addr && !(shmflg & SHM_REMAP)) {
852 user_addr = ERR_PTR(-EINVAL); 909 err = -EINVAL;
853 if (find_vma_intersection(current->mm, addr, addr + size)) 910 if (find_vma_intersection(current->mm, addr, addr + size))
854 goto invalid; 911 goto invalid;
855 /* 912 /*
@@ -861,11 +918,17 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
861 goto invalid; 918 goto invalid;
862 } 919 }
863 920
864 user_addr = (void*) do_mmap (file, addr, size, prot, flags, 0); 921 user_addr = do_mmap (file, addr, size, prot, flags, 0);
865 922 *raddr = user_addr;
923 err = 0;
924 if (IS_ERR_VALUE(user_addr))
925 err = (long)user_addr;
866invalid: 926invalid:
867 up_write(&current->mm->mmap_sem); 927 up_write(&current->mm->mmap_sem);
868 928
929 fput(file);
930
931out_nattch:
869 mutex_lock(&shm_ids(ns).mutex); 932 mutex_lock(&shm_ids(ns).mutex);
870 shp = shm_lock(ns, shmid); 933 shp = shm_lock(ns, shmid);
871 BUG_ON(!shp); 934 BUG_ON(!shp);
@@ -877,12 +940,19 @@ invalid:
877 shm_unlock(shp); 940 shm_unlock(shp);
878 mutex_unlock(&shm_ids(ns).mutex); 941 mutex_unlock(&shm_ids(ns).mutex);
879 942
880 *raddr = (unsigned long) user_addr;
881 err = 0;
882 if (IS_ERR(user_addr))
883 err = PTR_ERR(user_addr);
884out: 943out:
885 return err; 944 return err;
945
946out_unlock:
947 shm_unlock(shp);
948 goto out;
949
950out_free:
951 kfree(sfd);
952out_put_path:
953 dput(path.dentry);
954 mntput(path.mnt);
955 goto out_nattch;
886} 956}
887 957
888asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg) 958asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg)
@@ -944,7 +1014,7 @@ asmlinkage long sys_shmdt(char __user *shmaddr)
944 * a fragment created by mprotect() and/or munmap(), or it 1014 * a fragment created by mprotect() and/or munmap(), or it
945 * otherwise it starts at this address with no hassles. 1015 * otherwise it starts at this address with no hassles.
946 */ 1016 */
947 if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) && 1017 if ((vma->vm_ops == &shm_vm_ops) &&
948 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) { 1018 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
949 1019
950 1020
@@ -973,7 +1043,7 @@ asmlinkage long sys_shmdt(char __user *shmaddr)
973 next = vma->vm_next; 1043 next = vma->vm_next;
974 1044
975 /* finding a matching vma now does not alter retval */ 1045 /* finding a matching vma now does not alter retval */
976 if ((vma->vm_ops == &shm_vm_ops || is_vm_hugetlb_page(vma)) && 1046 if ((vma->vm_ops == &shm_vm_ops) &&
977 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) 1047 (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff)
978 1048
979 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); 1049 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
@@ -1004,7 +1074,7 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1004 shp->shm_segsz, 1074 shp->shm_segsz,
1005 shp->shm_cprid, 1075 shp->shm_cprid,
1006 shp->shm_lprid, 1076 shp->shm_lprid,
1007 is_file_hugepages(shp->shm_file) ? (file_count(shp->shm_file) - 1) : shp->shm_nattch, 1077 shp->shm_nattch,
1008 shp->shm_perm.uid, 1078 shp->shm_perm.uid,
1009 shp->shm_perm.gid, 1079 shp->shm_perm.gid,
1010 shp->shm_perm.cuid, 1080 shp->shm_perm.cuid,