aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 20:51:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-01 20:51:54 -0400
commit20b4fb485227404329e41ad15588afad3df23050 (patch)
treef3e099f0ab3da8a93b447203e294d2bb22f6dc05 /fs
parentb9394d8a657cd3c064fa432aa0905c1b58b38fe9 (diff)
parentac3e3c5b1164397656df81b9e9ab4991184d3236 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull VFS updates from Al Viro, Misc cleanups all over the place, mainly wrt /proc interfaces (switch create_proc_entry to proc_create(), get rid of the deprecated create_proc_read_entry() in favor of using proc_create_data() and seq_file etc). 7kloc removed. * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (204 commits) don't bother with deferred freeing of fdtables proc: Move non-public stuff from linux/proc_fs.h to fs/proc/internal.h proc: Make the PROC_I() and PDE() macros internal to procfs proc: Supply a function to remove a proc entry by PDE take cgroup_open() and cpuset_open() to fs/proc/base.c ppc: Clean up scanlog ppc: Clean up rtas_flash driver somewhat hostap: proc: Use remove_proc_subtree() drm: proc: Use remove_proc_subtree() drm: proc: Use minor->index to label things, not PDE->name drm: Constify drm_proc_list[] zoran: Don't print proc_dir_entry data in debug reiserfs: Don't access the proc_dir_entry in r_open(), r_start() r_show() proc: Supply an accessor for getting the data from a PDE's parent airo: Use remove_proc_subtree() rtl8192u: Don't need to save device proc dir PDE rtl8187se: Use a dir under /proc/net/r8180/ proc: Add proc_mkdir_data() proc: Move some bits from linux/proc_fs.h to linux/{of.h,signal.h,tty.h} proc: Move PDE_NET() to fs/proc/proc_net.c ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile2
-rw-r--r--fs/afs/proc.c8
-rw-r--r--fs/aio.c4
-rw-r--r--fs/binfmt_aout.c25
-rw-r--r--fs/binfmt_elf_fdpic.c7
-rw-r--r--fs/binfmt_flat.c37
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/coda/file.c2
-rw-r--r--fs/compat.c184
-rw-r--r--fs/coredump.c6
-rw-r--r--fs/efivarfs/file.c1
-rw-r--r--fs/efivarfs/inode.c1
-rw-r--r--fs/efivarfs/super.c2
-rw-r--r--fs/exec.c9
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/f2fs/acl.c2
-rw-r--r--fs/f2fs/dir.c2
-rw-r--r--fs/f2fs/file.c4
-rw-r--r--fs/fifo.c153
-rw-r--r--fs/file.c68
-rw-r--r--fs/fuse/dev.c2
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/hpfs/file.c36
-rw-r--r--fs/hppfs/hppfs.c20
-rw-r--r--fs/inode.c2
-rw-r--r--fs/internal.h5
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/mount.h7
-rw-r--r--fs/namespace.c341
-rw-r--r--fs/nfsd/nfsctl.c4
-rw-r--r--fs/notify/inotify/inotify_user.c3
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ocfs2/file.c9
-rw-r--r--fs/pipe.c458
-rw-r--r--fs/pnode.c10
-rw-r--r--fs/pnode.h7
-rw-r--r--fs/proc/base.c56
-rw-r--r--fs/proc/fd.h5
-rw-r--r--fs/proc/generic.c377
-rw-r--r--fs/proc/inode.c283
-rw-r--r--fs/proc/internal.h313
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/namespaces.c17
-rw-r--r--fs/proc/proc_devtree.c2
-rw-r--r--fs/proc/proc_net.c4
-rw-r--r--fs/proc/root.c2
-rw-r--r--fs/proc/self.c47
-rw-r--r--fs/proc/vmcore.c5
-rw-r--r--fs/read_write.c205
-rw-r--r--fs/read_write.h5
-rw-r--r--fs/reiserfs/file.c61
-rw-r--r--fs/reiserfs/procfs.c62
-rw-r--r--fs/seq_file.c18
-rw-r--r--fs/splice.c14
-rw-r--r--fs/xfs/xfs_file.c3
58 files changed, 1307 insertions, 1613 deletions
diff --git a/fs/Makefile b/fs/Makefile
index f0db9c941a5f..4fe6df3ec28f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -7,7 +7,7 @@
7 7
8obj-y := open.o read_write.o file_table.o super.o \ 8obj-y := open.o read_write.o file_table.o super.o \
9 char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ 9 char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
10 ioctl.o readdir.o select.o fifo.o dcache.o inode.o \ 10 ioctl.o readdir.o select.o dcache.o inode.o \
11 attr.o bad_inode.o file.o filesystems.o namespace.o \ 11 attr.o bad_inode.o file.o filesystems.o namespace.o \
12 seq_file.o xattr.o libfs.o fs-writeback.o \ 12 seq_file.o xattr.o libfs.o fs-writeback.o \
13 pnode.o splice.o sync.o utimes.o \ 13 pnode.o splice.o sync.o utimes.o \
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 096b23f821a1..526e4bbbde59 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -190,7 +190,7 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
190 return ret; 190 return ret;
191 191
192 m = file->private_data; 192 m = file->private_data;
193 m->private = PDE(inode)->data; 193 m->private = PDE_DATA(inode);
194 194
195 return 0; 195 return 0;
196} 196}
@@ -448,7 +448,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
448 struct seq_file *m; 448 struct seq_file *m;
449 int ret; 449 int ret;
450 450
451 cell = PDE(inode)->data; 451 cell = PDE_DATA(inode);
452 if (!cell) 452 if (!cell)
453 return -ENOENT; 453 return -ENOENT;
454 454
@@ -554,7 +554,7 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
554 struct seq_file *m; 554 struct seq_file *m;
555 int ret; 555 int ret;
556 556
557 cell = PDE(inode)->data; 557 cell = PDE_DATA(inode);
558 if (!cell) 558 if (!cell)
559 return -ENOENT; 559 return -ENOENT;
560 560
@@ -659,7 +659,7 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
659 struct seq_file *m; 659 struct seq_file *m;
660 int ret; 660 int ret;
661 661
662 cell = PDE(inode)->data; 662 cell = PDE_DATA(inode);
663 if (!cell) 663 if (!cell)
664 return -ENOENT; 664 return -ENOENT;
665 665
diff --git a/fs/aio.c b/fs/aio.c
index 6db8745c2edd..351afe7ac78e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1324,6 +1324,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
1324 if (iocb->ki_pos < 0) 1324 if (iocb->ki_pos < 0)
1325 return -EINVAL; 1325 return -EINVAL;
1326 1326
1327 if (opcode == IOCB_CMD_PWRITEV)
1328 file_start_write(file);
1327 do { 1329 do {
1328 ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg], 1330 ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
1329 iocb->ki_nr_segs - iocb->ki_cur_seg, 1331 iocb->ki_nr_segs - iocb->ki_cur_seg,
@@ -1336,6 +1338,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
1336 } while (ret > 0 && iocb->ki_left > 0 && 1338 } while (ret > 0 && iocb->ki_left > 0 &&
1337 (opcode == IOCB_CMD_PWRITEV || 1339 (opcode == IOCB_CMD_PWRITEV ||
1338 (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode)))); 1340 (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));
1341 if (opcode == IOCB_CMD_PWRITEV)
1342 file_end_write(file);
1339 1343
1340 /* This means we must have transferred all that we could */ 1344 /* This means we must have transferred all that we could */
1341 /* No need to retry anymore */ 1345 /* No need to retry anymore */
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 02fe378fc506..bce87694f7b0 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -286,15 +286,12 @@ static int load_aout_binary(struct linux_binprm * bprm)
286 return error; 286 return error;
287 } 287 }
288 288
289 error = bprm->file->f_op->read(bprm->file, 289 error = read_code(bprm->file, text_addr, pos,
290 (char __user *)text_addr, 290 ex.a_text+ex.a_data);
291 ex.a_text+ex.a_data, &pos);
292 if ((signed long)error < 0) { 291 if ((signed long)error < 0) {
293 send_sig(SIGKILL, current, 0); 292 send_sig(SIGKILL, current, 0);
294 return error; 293 return error;
295 } 294 }
296
297 flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
298 } else { 295 } else {
299 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) && 296 if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
300 (N_MAGIC(ex) != NMAGIC) && printk_ratelimit()) 297 (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
@@ -310,14 +307,9 @@ static int load_aout_binary(struct linux_binprm * bprm)
310 } 307 }
311 308
312 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { 309 if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
313 loff_t pos = fd_offset;
314 vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); 310 vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
315 bprm->file->f_op->read(bprm->file, 311 read_code(bprm->file, N_TXTADDR(ex), fd_offset,
316 (char __user *)N_TXTADDR(ex), 312 ex.a_text + ex.a_data);
317 ex.a_text+ex.a_data, &pos);
318 flush_icache_range((unsigned long) N_TXTADDR(ex),
319 (unsigned long) N_TXTADDR(ex) +
320 ex.a_text+ex.a_data);
321 goto beyond_if; 313 goto beyond_if;
322 } 314 }
323 315
@@ -396,8 +388,6 @@ static int load_aout_library(struct file *file)
396 start_addr = ex.a_entry & 0xfffff000; 388 start_addr = ex.a_entry & 0xfffff000;
397 389
398 if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) { 390 if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
399 loff_t pos = N_TXTOFF(ex);
400
401 if (printk_ratelimit()) 391 if (printk_ratelimit())
402 { 392 {
403 printk(KERN_WARNING 393 printk(KERN_WARNING
@@ -406,11 +396,8 @@ static int load_aout_library(struct file *file)
406 } 396 }
407 vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); 397 vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
408 398
409 file->f_op->read(file, (char __user *)start_addr, 399 read_code(file, start_addr, N_TXTOFF(ex),
410 ex.a_text + ex.a_data, &pos); 400 ex.a_text + ex.a_data);
411 flush_icache_range((unsigned long) start_addr,
412 (unsigned long) start_addr + ex.a_text + ex.a_data);
413
414 retval = 0; 401 retval = 0;
415 goto out; 402 goto out;
416 } 403 }
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c1cc06aed601..9dac212fc6f9 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -926,7 +926,6 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
926 struct elf32_fdpic_loadseg *seg; 926 struct elf32_fdpic_loadseg *seg;
927 struct elf32_phdr *phdr; 927 struct elf32_phdr *phdr;
928 unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0, mflags; 928 unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0, mflags;
929 loff_t fpos;
930 int loop, ret; 929 int loop, ret;
931 930
932 load_addr = params->load_addr; 931 load_addr = params->load_addr;
@@ -964,14 +963,12 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
964 if (params->phdrs[loop].p_type != PT_LOAD) 963 if (params->phdrs[loop].p_type != PT_LOAD)
965 continue; 964 continue;
966 965
967 fpos = phdr->p_offset;
968
969 seg->addr = maddr + (phdr->p_vaddr - base); 966 seg->addr = maddr + (phdr->p_vaddr - base);
970 seg->p_vaddr = phdr->p_vaddr; 967 seg->p_vaddr = phdr->p_vaddr;
971 seg->p_memsz = phdr->p_memsz; 968 seg->p_memsz = phdr->p_memsz;
972 969
973 ret = file->f_op->read(file, (void *) seg->addr, 970 ret = read_code(file, seg->addr, phdr->p_offset,
974 phdr->p_filesz, &fpos); 971 phdr->p_filesz);
975 if (ret < 0) 972 if (ret < 0)
976 return ret; 973 return ret;
977 974
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 2036d21baaef..d50bbe59da1e 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -207,11 +207,12 @@ static int decompress_exec(
207 207
208 /* Read in first chunk of data and parse gzip header. */ 208 /* Read in first chunk of data and parse gzip header. */
209 fpos = offset; 209 fpos = offset;
210 ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); 210 ret = kernel_read(bprm->file, offset, buf, LBUFSIZE);
211 211
212 strm.next_in = buf; 212 strm.next_in = buf;
213 strm.avail_in = ret; 213 strm.avail_in = ret;
214 strm.total_in = 0; 214 strm.total_in = 0;
215 fpos += ret;
215 216
216 retval = -ENOEXEC; 217 retval = -ENOEXEC;
217 218
@@ -277,7 +278,7 @@ static int decompress_exec(
277 } 278 }
278 279
279 while ((ret = zlib_inflate(&strm, Z_NO_FLUSH)) == Z_OK) { 280 while ((ret = zlib_inflate(&strm, Z_NO_FLUSH)) == Z_OK) {
280 ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos); 281 ret = kernel_read(bprm->file, fpos, buf, LBUFSIZE);
281 if (ret <= 0) 282 if (ret <= 0)
282 break; 283 break;
283 len -= ret; 284 len -= ret;
@@ -285,6 +286,7 @@ static int decompress_exec(
285 strm.next_in = buf; 286 strm.next_in = buf;
286 strm.avail_in = ret; 287 strm.avail_in = ret;
287 strm.total_in = 0; 288 strm.total_in = 0;
289 fpos += ret;
288 } 290 }
289 291
290 if (ret < 0) { 292 if (ret < 0) {
@@ -428,6 +430,7 @@ static int load_flat_file(struct linux_binprm * bprm,
428 unsigned long textpos = 0, datapos = 0, result; 430 unsigned long textpos = 0, datapos = 0, result;
429 unsigned long realdatastart = 0; 431 unsigned long realdatastart = 0;
430 unsigned long text_len, data_len, bss_len, stack_len, flags; 432 unsigned long text_len, data_len, bss_len, stack_len, flags;
433 unsigned long full_data;
431 unsigned long len, memp = 0; 434 unsigned long len, memp = 0;
432 unsigned long memp_size, extra, rlim; 435 unsigned long memp_size, extra, rlim;
433 unsigned long *reloc = 0, *rp; 436 unsigned long *reloc = 0, *rp;
@@ -451,6 +454,7 @@ static int load_flat_file(struct linux_binprm * bprm,
451 relocs = ntohl(hdr->reloc_count); 454 relocs = ntohl(hdr->reloc_count);
452 flags = ntohl(hdr->flags); 455 flags = ntohl(hdr->flags);
453 rev = ntohl(hdr->rev); 456 rev = ntohl(hdr->rev);
457 full_data = data_len + relocs * sizeof(unsigned long);
454 458
455 if (strncmp(hdr->magic, "bFLT", 4)) { 459 if (strncmp(hdr->magic, "bFLT", 4)) {
456 /* 460 /*
@@ -577,12 +581,12 @@ static int load_flat_file(struct linux_binprm * bprm,
577#ifdef CONFIG_BINFMT_ZFLAT 581#ifdef CONFIG_BINFMT_ZFLAT
578 if (flags & FLAT_FLAG_GZDATA) { 582 if (flags & FLAT_FLAG_GZDATA) {
579 result = decompress_exec(bprm, fpos, (char *) datapos, 583 result = decompress_exec(bprm, fpos, (char *) datapos,
580 data_len + (relocs * sizeof(unsigned long)), 0); 584 full_data, 0);
581 } else 585 } else
582#endif 586#endif
583 { 587 {
584 result = bprm->file->f_op->read(bprm->file, (char *) datapos, 588 result = read_code(bprm->file, datapos, fpos,
585 data_len + (relocs * sizeof(unsigned long)), &fpos); 589 full_data);
586 } 590 }
587 if (IS_ERR_VALUE(result)) { 591 if (IS_ERR_VALUE(result)) {
588 printk("Unable to read data+bss, errno %d\n", (int)-result); 592 printk("Unable to read data+bss, errno %d\n", (int)-result);
@@ -627,30 +631,25 @@ static int load_flat_file(struct linux_binprm * bprm,
627 if (flags & FLAT_FLAG_GZIP) { 631 if (flags & FLAT_FLAG_GZIP) {
628 result = decompress_exec(bprm, sizeof (struct flat_hdr), 632 result = decompress_exec(bprm, sizeof (struct flat_hdr),
629 (((char *) textpos) + sizeof (struct flat_hdr)), 633 (((char *) textpos) + sizeof (struct flat_hdr)),
630 (text_len + data_len + (relocs * sizeof(unsigned long)) 634 (text_len + full_data
631 - sizeof (struct flat_hdr)), 635 - sizeof (struct flat_hdr)),
632 0); 636 0);
633 memmove((void *) datapos, (void *) realdatastart, 637 memmove((void *) datapos, (void *) realdatastart,
634 data_len + (relocs * sizeof(unsigned long))); 638 full_data);
635 } else if (flags & FLAT_FLAG_GZDATA) { 639 } else if (flags & FLAT_FLAG_GZDATA) {
636 fpos = 0; 640 result = read_code(bprm->file, textpos, 0, text_len);
637 result = bprm->file->f_op->read(bprm->file,
638 (char *) textpos, text_len, &fpos);
639 if (!IS_ERR_VALUE(result)) 641 if (!IS_ERR_VALUE(result))
640 result = decompress_exec(bprm, text_len, (char *) datapos, 642 result = decompress_exec(bprm, text_len, (char *) datapos,
641 data_len + (relocs * sizeof(unsigned long)), 0); 643 full_data, 0);
642 } 644 }
643 else 645 else
644#endif 646#endif
645 { 647 {
646 fpos = 0; 648 result = read_code(bprm->file, textpos, 0, text_len);
647 result = bprm->file->f_op->read(bprm->file, 649 if (!IS_ERR_VALUE(result))
648 (char *) textpos, text_len, &fpos); 650 result = read_code(bprm->file, datapos,
649 if (!IS_ERR_VALUE(result)) { 651 ntohl(hdr->data_start),
650 fpos = ntohl(hdr->data_start); 652 full_data);
651 result = bprm->file->f_op->read(bprm->file, (char *) datapos,
652 data_len + (relocs * sizeof(unsigned long)), &fpos);
653 }
654 } 653 }
655 if (IS_ERR_VALUE(result)) { 654 if (IS_ERR_VALUE(result)) {
656 printk("Unable to read code+data+bss, errno %d\n",(int)-result); 655 printk("Unable to read code+data+bss, errno %d\n",(int)-result);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ade03e6f7bd2..bb8b7a0e28a6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1514,8 +1514,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1514 size_t count, ocount; 1514 size_t count, ocount;
1515 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); 1515 bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
1516 1516
1517 sb_start_write(inode->i_sb);
1518
1519 mutex_lock(&inode->i_mutex); 1517 mutex_lock(&inode->i_mutex);
1520 1518
1521 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); 1519 err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
@@ -1617,7 +1615,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1617 if (sync) 1615 if (sync)
1618 atomic_dec(&BTRFS_I(inode)->sync_writers); 1616 atomic_dec(&BTRFS_I(inode)->sync_writers);
1619out: 1617out:
1620 sb_end_write(inode->i_sb);
1621 current->backing_dev_info = NULL; 1618 current->backing_dev_info = NULL;
1622 return num_written ? num_written : err; 1619 return num_written ? num_written : err;
1623} 1620}
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 480992259707..317f9ee9c991 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -962,12 +962,14 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
962 } 962 }
963 963
964 data = kmap(page); 964 data = kmap(page);
965 file_start_write(file);
965 old_fs = get_fs(); 966 old_fs = get_fs();
966 set_fs(KERNEL_DS); 967 set_fs(KERNEL_DS);
967 ret = file->f_op->write( 968 ret = file->f_op->write(
968 file, (const void __user *) data, len, &pos); 969 file, (const void __user *) data, len, &pos);
969 set_fs(old_fs); 970 set_fs(old_fs);
970 kunmap(page); 971 kunmap(page);
972 file_end_write(file);
971 if (ret != len) 973 if (ret != len)
972 ret = -EIO; 974 ret = -EIO;
973 } 975 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7a0dd99e4507..2d4a231dd70b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2520,8 +2520,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2520 2520
2521 BUG_ON(iocb->ki_pos != pos); 2521 BUG_ON(iocb->ki_pos != pos);
2522 2522
2523 sb_start_write(inode->i_sb);
2524
2525 /* 2523 /*
2526 * We need to hold the sem to be sure nobody modifies lock list 2524 * We need to hold the sem to be sure nobody modifies lock list
2527 * with a brlock that prevents writing. 2525 * with a brlock that prevents writing.
@@ -2545,7 +2543,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2545 } 2543 }
2546 2544
2547 up_read(&cinode->lock_sem); 2545 up_read(&cinode->lock_sem);
2548 sb_end_write(inode->i_sb);
2549 return rc; 2546 return rc;
2550} 2547}
2551 2548
diff --git a/fs/coda/file.c b/fs/coda/file.c
index fa4c100bdc7d..380b798f8443 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -79,6 +79,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
79 return -EINVAL; 79 return -EINVAL;
80 80
81 host_inode = file_inode(host_file); 81 host_inode = file_inode(host_file);
82 file_start_write(host_file);
82 mutex_lock(&coda_inode->i_mutex); 83 mutex_lock(&coda_inode->i_mutex);
83 84
84 ret = host_file->f_op->write(host_file, buf, count, ppos); 85 ret = host_file->f_op->write(host_file, buf, count, ppos);
@@ -87,6 +88,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
87 coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9; 88 coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
88 coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC; 89 coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
89 mutex_unlock(&coda_inode->i_mutex); 90 mutex_unlock(&coda_inode->i_mutex);
91 file_end_write(host_file);
90 92
91 return ret; 93 return ret;
92} 94}
diff --git a/fs/compat.c b/fs/compat.c
index 5f83ffa42115..d0560c93973d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1068,190 +1068,6 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
1068} 1068}
1069#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ 1069#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */
1070 1070
1071static ssize_t compat_do_readv_writev(int type, struct file *file,
1072 const struct compat_iovec __user *uvector,
1073 unsigned long nr_segs, loff_t *pos)
1074{
1075 compat_ssize_t tot_len;
1076 struct iovec iovstack[UIO_FASTIOV];
1077 struct iovec *iov = iovstack;
1078 ssize_t ret;
1079 io_fn_t fn;
1080 iov_fn_t fnv;
1081
1082 ret = -EINVAL;
1083 if (!file->f_op)
1084 goto out;
1085
1086 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
1087 UIO_FASTIOV, iovstack, &iov);
1088 if (ret <= 0)
1089 goto out;
1090
1091 tot_len = ret;
1092 ret = rw_verify_area(type, file, pos, tot_len);
1093 if (ret < 0)
1094 goto out;
1095
1096 fnv = NULL;
1097 if (type == READ) {
1098 fn = file->f_op->read;
1099 fnv = file->f_op->aio_read;
1100 } else {
1101 fn = (io_fn_t)file->f_op->write;
1102 fnv = file->f_op->aio_write;
1103 }
1104
1105 if (fnv)
1106 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
1107 pos, fnv);
1108 else
1109 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
1110
1111out:
1112 if (iov != iovstack)
1113 kfree(iov);
1114 if ((ret + (type == READ)) > 0) {
1115 if (type == READ)
1116 fsnotify_access(file);
1117 else
1118 fsnotify_modify(file);
1119 }
1120 return ret;
1121}
1122
1123static size_t compat_readv(struct file *file,
1124 const struct compat_iovec __user *vec,
1125 unsigned long vlen, loff_t *pos)
1126{
1127 ssize_t ret = -EBADF;
1128
1129 if (!(file->f_mode & FMODE_READ))
1130 goto out;
1131
1132 ret = -EINVAL;
1133 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
1134 goto out;
1135
1136 ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
1137
1138out:
1139 if (ret > 0)
1140 add_rchar(current, ret);
1141 inc_syscr(current);
1142 return ret;
1143}
1144
1145asmlinkage ssize_t
1146compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
1147 unsigned long vlen)
1148{
1149 struct fd f = fdget(fd);
1150 ssize_t ret;
1151 loff_t pos;
1152
1153 if (!f.file)
1154 return -EBADF;
1155 pos = f.file->f_pos;
1156 ret = compat_readv(f.file, vec, vlen, &pos);
1157 f.file->f_pos = pos;
1158 fdput(f);
1159 return ret;
1160}
1161
1162asmlinkage ssize_t
1163compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec,
1164 unsigned long vlen, loff_t pos)
1165{
1166 struct fd f;
1167 ssize_t ret;
1168
1169 if (pos < 0)
1170 return -EINVAL;
1171 f = fdget(fd);
1172 if (!f.file)
1173 return -EBADF;
1174 ret = -ESPIPE;
1175 if (f.file->f_mode & FMODE_PREAD)
1176 ret = compat_readv(f.file, vec, vlen, &pos);
1177 fdput(f);
1178 return ret;
1179}
1180
1181asmlinkage ssize_t
1182compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
1183 unsigned long vlen, u32 pos_low, u32 pos_high)
1184{
1185 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1186 return compat_sys_preadv64(fd, vec, vlen, pos);
1187}
1188
1189static size_t compat_writev(struct file *file,
1190 const struct compat_iovec __user *vec,
1191 unsigned long vlen, loff_t *pos)
1192{
1193 ssize_t ret = -EBADF;
1194
1195 if (!(file->f_mode & FMODE_WRITE))
1196 goto out;
1197
1198 ret = -EINVAL;
1199 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
1200 goto out;
1201
1202 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
1203
1204out:
1205 if (ret > 0)
1206 add_wchar(current, ret);
1207 inc_syscw(current);
1208 return ret;
1209}
1210
1211asmlinkage ssize_t
1212compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
1213 unsigned long vlen)
1214{
1215 struct fd f = fdget(fd);
1216 ssize_t ret;
1217 loff_t pos;
1218
1219 if (!f.file)
1220 return -EBADF;
1221 pos = f.file->f_pos;
1222 ret = compat_writev(f.file, vec, vlen, &pos);
1223 f.file->f_pos = pos;
1224 fdput(f);
1225 return ret;
1226}
1227
1228asmlinkage ssize_t
1229compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec,
1230 unsigned long vlen, loff_t pos)
1231{
1232 struct fd f;
1233 ssize_t ret;
1234
1235 if (pos < 0)
1236 return -EINVAL;
1237 f = fdget(fd);
1238 if (!f.file)
1239 return -EBADF;
1240 ret = -ESPIPE;
1241 if (f.file->f_mode & FMODE_PWRITE)
1242 ret = compat_writev(f.file, vec, vlen, &pos);
1243 fdput(f);
1244 return ret;
1245}
1246
1247asmlinkage ssize_t
1248compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
1249 unsigned long vlen, u32 pos_low, u32 pos_high)
1250{
1251 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1252 return compat_sys_pwritev64(fd, vec, vlen, pos);
1253}
1254
1255/* 1071/*
1256 * Exactly like fs/open.c:sys_open(), except that it doesn't set the 1072 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
1257 * O_LARGEFILE flag. 1073 * O_LARGEFILE flag.
diff --git a/fs/coredump.c b/fs/coredump.c
index ec306cc9a28a..a9abe313e8d5 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -432,9 +432,7 @@ static bool dump_interrupted(void)
432 432
433static void wait_for_dump_helpers(struct file *file) 433static void wait_for_dump_helpers(struct file *file)
434{ 434{
435 struct pipe_inode_info *pipe; 435 struct pipe_inode_info *pipe = file->private_data;
436
437 pipe = file_inode(file)->i_pipe;
438 436
439 pipe_lock(pipe); 437 pipe_lock(pipe);
440 pipe->readers++; 438 pipe->readers++;
@@ -656,7 +654,9 @@ void do_coredump(siginfo_t *siginfo)
656 goto close_fail; 654 goto close_fail;
657 if (displaced) 655 if (displaced)
658 put_files_struct(displaced); 656 put_files_struct(displaced);
657 file_start_write(cprm.file);
659 core_dumped = !dump_interrupted() && binfmt->core_dump(&cprm); 658 core_dumped = !dump_interrupted() && binfmt->core_dump(&cprm);
659 file_end_write(cprm.file);
660 660
661 if (ispipe && core_pipe_limit) 661 if (ispipe && core_pipe_limit)
662 wait_for_dump_helpers(cprm.file); 662 wait_for_dump_helpers(cprm.file);
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index ede07fc7309f..bfb531564319 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/efi.h> 10#include <linux/efi.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/slab.h>
12 13
13#include "internal.h" 14#include "internal.h"
14 15
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 640e289d522e..7e787fb90293 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -10,6 +10,7 @@
10#include <linux/efi.h> 10#include <linux/efi.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/ctype.h> 12#include <linux/ctype.h>
13#include <linux/slab.h>
13 14
14#include "internal.h" 15#include "internal.h"
15 16
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 525a2a1ac16c..141aee31884f 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -13,6 +13,8 @@
13#include <linux/module.h> 13#include <linux/module.h>
14#include <linux/pagemap.h> 14#include <linux/pagemap.h>
15#include <linux/ucs2_string.h> 15#include <linux/ucs2_string.h>
16#include <linux/slab.h>
17#include <linux/magic.h>
16 18
17#include "internal.h" 19#include "internal.h"
18 20
diff --git a/fs/exec.c b/fs/exec.c
index 963f510a25ab..643019585574 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -802,6 +802,15 @@ int kernel_read(struct file *file, loff_t offset,
802 802
803EXPORT_SYMBOL(kernel_read); 803EXPORT_SYMBOL(kernel_read);
804 804
805ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
806{
807 ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos);
808 if (res > 0)
809 flush_icache_range(addr, addr + len);
810 return res;
811}
812EXPORT_SYMBOL(read_code);
813
805static int exec_mmap(struct mm_struct *mm) 814static int exec_mmap(struct mm_struct *mm)
806{ 815{
807 struct task_struct *tsk; 816 struct task_struct *tsk;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a11ea4d6164c..b1ed9e07434b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2260,7 +2260,7 @@ static const struct seq_operations ext4_mb_seq_groups_ops = {
2260 2260
2261static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file) 2261static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2262{ 2262{
2263 struct super_block *sb = PDE(inode)->data; 2263 struct super_block *sb = PDE_DATA(inode);
2264 int rc; 2264 int rc;
2265 2265
2266 rc = seq_open(file, &ext4_mb_seq_groups_ops); 2266 rc = seq_open(file, &ext4_mb_seq_groups_ops);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dbc7c090c13a..24a146bde742 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1806,7 +1806,7 @@ static int options_seq_show(struct seq_file *seq, void *offset)
1806 1806
1807static int options_open_fs(struct inode *inode, struct file *file) 1807static int options_open_fs(struct inode *inode, struct file *file)
1808{ 1808{
1809 return single_open(file, options_seq_show, PDE(inode)->data); 1809 return single_open(file, options_seq_show, PDE_DATA(inode));
1810} 1810}
1811 1811
1812static const struct file_operations ext4_seq_options_fops = { 1812static const struct file_operations ext4_seq_options_fops = {
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 137af4255da6..44abc2f286e0 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -299,7 +299,7 @@ int f2fs_acl_chmod(struct inode *inode)
299 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); 299 struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
300 struct posix_acl *acl; 300 struct posix_acl *acl;
301 int error; 301 int error;
302 mode_t mode = get_inode_mode(inode); 302 umode_t mode = get_inode_mode(inode);
303 303
304 if (!test_opt(sbi, POSIX_ACL)) 304 if (!test_opt(sbi, POSIX_ACL))
305 return 0; 305 return 0;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a1f38443ecee..1be948768e2f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -60,7 +60,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
60 60
61static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) 61static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
62{ 62{
63 mode_t mode = inode->i_mode; 63 umode_t mode = inode->i_mode;
64 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; 64 de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
65} 65}
66 66
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 958a46da19ae..db626282d424 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -590,7 +590,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
590 { 590 {
591 unsigned int oldflags; 591 unsigned int oldflags;
592 592
593 ret = mnt_want_write(filp->f_path.mnt); 593 ret = mnt_want_write_file(filp);
594 if (ret) 594 if (ret)
595 return ret; 595 return ret;
596 596
@@ -627,7 +627,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
627 inode->i_ctime = CURRENT_TIME; 627 inode->i_ctime = CURRENT_TIME;
628 mark_inode_dirty(inode); 628 mark_inode_dirty(inode);
629out: 629out:
630 mnt_drop_write(filp->f_path.mnt); 630 mnt_drop_write_file(filp);
631 return ret; 631 return ret;
632 } 632 }
633 default: 633 default:
diff --git a/fs/fifo.c b/fs/fifo.c
deleted file mode 100644
index cf6f4345ceb0..000000000000
--- a/fs/fifo.c
+++ /dev/null
@@ -1,153 +0,0 @@
1/*
2 * linux/fs/fifo.c
3 *
4 * written by Paul H. Hargrove
5 *
6 * Fixes:
7 * 10-06-1999, AV: fixed OOM handling in fifo_open(), moved
8 * initialization there, switched to external
9 * allocation of pipe_inode_info.
10 */
11
12#include <linux/mm.h>
13#include <linux/fs.h>
14#include <linux/sched.h>
15#include <linux/pipe_fs_i.h>
16
17static int wait_for_partner(struct inode* inode, unsigned int *cnt)
18{
19 int cur = *cnt;
20
21 while (cur == *cnt) {
22 pipe_wait(inode->i_pipe);
23 if (signal_pending(current))
24 break;
25 }
26 return cur == *cnt ? -ERESTARTSYS : 0;
27}
28
29static void wake_up_partner(struct inode* inode)
30{
31 wake_up_interruptible(&inode->i_pipe->wait);
32}
33
34static int fifo_open(struct inode *inode, struct file *filp)
35{
36 struct pipe_inode_info *pipe;
37 int ret;
38
39 mutex_lock(&inode->i_mutex);
40 pipe = inode->i_pipe;
41 if (!pipe) {
42 ret = -ENOMEM;
43 pipe = alloc_pipe_info(inode);
44 if (!pipe)
45 goto err_nocleanup;
46 inode->i_pipe = pipe;
47 }
48 filp->f_version = 0;
49
50 /* We can only do regular read/write on fifos */
51 filp->f_mode &= (FMODE_READ | FMODE_WRITE);
52
53 switch (filp->f_mode) {
54 case FMODE_READ:
55 /*
56 * O_RDONLY
57 * POSIX.1 says that O_NONBLOCK means return with the FIFO
58 * opened, even when there is no process writing the FIFO.
59 */
60 filp->f_op = &read_pipefifo_fops;
61 pipe->r_counter++;
62 if (pipe->readers++ == 0)
63 wake_up_partner(inode);
64
65 if (!pipe->writers) {
66 if ((filp->f_flags & O_NONBLOCK)) {
67 /* suppress POLLHUP until we have
68 * seen a writer */
69 filp->f_version = pipe->w_counter;
70 } else {
71 if (wait_for_partner(inode, &pipe->w_counter))
72 goto err_rd;
73 }
74 }
75 break;
76
77 case FMODE_WRITE:
78 /*
79 * O_WRONLY
80 * POSIX.1 says that O_NONBLOCK means return -1 with
81 * errno=ENXIO when there is no process reading the FIFO.
82 */
83 ret = -ENXIO;
84 if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
85 goto err;
86
87 filp->f_op = &write_pipefifo_fops;
88 pipe->w_counter++;
89 if (!pipe->writers++)
90 wake_up_partner(inode);
91
92 if (!pipe->readers) {
93 if (wait_for_partner(inode, &pipe->r_counter))
94 goto err_wr;
95 }
96 break;
97
98 case FMODE_READ | FMODE_WRITE:
99 /*
100 * O_RDWR
101 * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
102 * This implementation will NEVER block on a O_RDWR open, since
103 * the process can at least talk to itself.
104 */
105 filp->f_op = &rdwr_pipefifo_fops;
106
107 pipe->readers++;
108 pipe->writers++;
109 pipe->r_counter++;
110 pipe->w_counter++;
111 if (pipe->readers == 1 || pipe->writers == 1)
112 wake_up_partner(inode);
113 break;
114
115 default:
116 ret = -EINVAL;
117 goto err;
118 }
119
120 /* Ok! */
121 mutex_unlock(&inode->i_mutex);
122 return 0;
123
124err_rd:
125 if (!--pipe->readers)
126 wake_up_interruptible(&pipe->wait);
127 ret = -ERESTARTSYS;
128 goto err;
129
130err_wr:
131 if (!--pipe->writers)
132 wake_up_interruptible(&pipe->wait);
133 ret = -ERESTARTSYS;
134 goto err;
135
136err:
137 if (!pipe->readers && !pipe->writers)
138 free_pipe_info(inode);
139
140err_nocleanup:
141 mutex_unlock(&inode->i_mutex);
142 return ret;
143}
144
145/*
146 * Dummy default file-operations: the only thing this does
147 * is contain the open that then fills in the correct operations
148 * depending on the access mode of the file...
149 */
150const struct file_operations def_fifo_fops = {
151 .open = fifo_open, /* will set read_ or write_pipefifo_fops */
152 .llseek = noop_llseek,
153};
diff --git a/fs/file.c b/fs/file.c
index 3906d9577a18..4a78f981557a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -23,24 +23,10 @@
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/workqueue.h> 24#include <linux/workqueue.h>
25 25
26struct fdtable_defer {
27 spinlock_t lock;
28 struct work_struct wq;
29 struct fdtable *next;
30};
31
32int sysctl_nr_open __read_mostly = 1024*1024; 26int sysctl_nr_open __read_mostly = 1024*1024;
33int sysctl_nr_open_min = BITS_PER_LONG; 27int sysctl_nr_open_min = BITS_PER_LONG;
34int sysctl_nr_open_max = 1024 * 1024; /* raised later */ 28int sysctl_nr_open_max = 1024 * 1024; /* raised later */
35 29
36/*
37 * We use this list to defer free fdtables that have vmalloced
38 * sets/arrays. By keeping a per-cpu list, we avoid having to embed
39 * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
40 * this per-task structure.
41 */
42static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
43
44static void *alloc_fdmem(size_t size) 30static void *alloc_fdmem(size_t size)
45{ 31{
46 /* 32 /*
@@ -67,46 +53,9 @@ static void __free_fdtable(struct fdtable *fdt)
67 kfree(fdt); 53 kfree(fdt);
68} 54}
69 55
70static void free_fdtable_work(struct work_struct *work)
71{
72 struct fdtable_defer *f =
73 container_of(work, struct fdtable_defer, wq);
74 struct fdtable *fdt;
75
76 spin_lock_bh(&f->lock);
77 fdt = f->next;
78 f->next = NULL;
79 spin_unlock_bh(&f->lock);
80 while(fdt) {
81 struct fdtable *next = fdt->next;
82
83 __free_fdtable(fdt);
84 fdt = next;
85 }
86}
87
88static void free_fdtable_rcu(struct rcu_head *rcu) 56static void free_fdtable_rcu(struct rcu_head *rcu)
89{ 57{
90 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); 58 __free_fdtable(container_of(rcu, struct fdtable, rcu));
91 struct fdtable_defer *fddef;
92
93 BUG_ON(!fdt);
94 BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT);
95
96 if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
97 kfree(fdt->fd);
98 kfree(fdt->open_fds);
99 kfree(fdt);
100 } else {
101 fddef = &get_cpu_var(fdtable_defer_list);
102 spin_lock(&fddef->lock);
103 fdt->next = fddef->next;
104 fddef->next = fdt;
105 /* vmallocs are handled from the workqueue context */
106 schedule_work(&fddef->wq);
107 spin_unlock(&fddef->lock);
108 put_cpu_var(fdtable_defer_list);
109 }
110} 59}
111 60
112/* 61/*
@@ -174,7 +123,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
174 fdt->open_fds = data; 123 fdt->open_fds = data;
175 data += nr / BITS_PER_BYTE; 124 data += nr / BITS_PER_BYTE;
176 fdt->close_on_exec = data; 125 fdt->close_on_exec = data;
177 fdt->next = NULL;
178 126
179 return fdt; 127 return fdt;
180 128
@@ -221,7 +169,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
221 /* Continue as planned */ 169 /* Continue as planned */
222 copy_fdtable(new_fdt, cur_fdt); 170 copy_fdtable(new_fdt, cur_fdt);
223 rcu_assign_pointer(files->fdt, new_fdt); 171 rcu_assign_pointer(files->fdt, new_fdt);
224 if (cur_fdt->max_fds > NR_OPEN_DEFAULT) 172 if (cur_fdt != &files->fdtab)
225 call_rcu(&cur_fdt->rcu, free_fdtable_rcu); 173 call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
226 } else { 174 } else {
227 /* Somebody else expanded, so undo our attempt */ 175 /* Somebody else expanded, so undo our attempt */
@@ -316,7 +264,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
316 new_fdt->close_on_exec = newf->close_on_exec_init; 264 new_fdt->close_on_exec = newf->close_on_exec_init;
317 new_fdt->open_fds = newf->open_fds_init; 265 new_fdt->open_fds = newf->open_fds_init;
318 new_fdt->fd = &newf->fd_array[0]; 266 new_fdt->fd = &newf->fd_array[0];
319 new_fdt->next = NULL;
320 267
321 spin_lock(&oldf->file_lock); 268 spin_lock(&oldf->file_lock);
322 old_fdt = files_fdtable(oldf); 269 old_fdt = files_fdtable(oldf);
@@ -490,19 +437,8 @@ void exit_files(struct task_struct *tsk)
490 } 437 }
491} 438}
492 439
493static void fdtable_defer_list_init(int cpu)
494{
495 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
496 spin_lock_init(&fddef->lock);
497 INIT_WORK(&fddef->wq, free_fdtable_work);
498 fddef->next = NULL;
499}
500
501void __init files_defer_init(void) 440void __init files_defer_init(void)
502{ 441{
503 int i;
504 for_each_possible_cpu(i)
505 fdtable_defer_list_init(i);
506 sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & 442 sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
507 -BITS_PER_LONG; 443 -BITS_PER_LONG;
508} 444}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 11dfa0c3fb46..9bfd1a3214e6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1319,7 +1319,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1319 page_nr++; 1319 page_nr++;
1320 ret += buf->len; 1320 ret += buf->len;
1321 1321
1322 if (pipe->inode) 1322 if (pipe->files)
1323 do_wakeup = 1; 1323 do_wakeup = 1;
1324 } 1324 }
1325 1325
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 34b80ba95bad..d15c6f21c17f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -971,7 +971,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
971 return err; 971 return err;
972 972
973 count = ocount; 973 count = ocount;
974 sb_start_write(inode->i_sb);
975 mutex_lock(&inode->i_mutex); 974 mutex_lock(&inode->i_mutex);
976 975
977 /* We can write back this queue in page reclaim */ 976 /* We can write back this queue in page reclaim */
@@ -1030,7 +1029,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
1030out: 1029out:
1031 current->backing_dev_info = NULL; 1030 current->backing_dev_info = NULL;
1032 mutex_unlock(&inode->i_mutex); 1031 mutex_unlock(&inode->i_mutex);
1033 sb_end_write(inode->i_sb);
1034 1032
1035 return written ? written : err; 1033 return written ? written : err;
1036} 1034}
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 9f9dbeceeee7..3027f4dbbab5 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -131,6 +131,24 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping,
131 return ret; 131 return ret;
132} 132}
133 133
134static int hpfs_write_end(struct file *file, struct address_space *mapping,
135 loff_t pos, unsigned len, unsigned copied,
136 struct page *pagep, void *fsdata)
137{
138 struct inode *inode = mapping->host;
139 int err;
140 err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
141 if (err < len)
142 hpfs_write_failed(mapping, pos + len);
143 if (!(err < 0)) {
144 /* make sure we write it on close, if not earlier */
145 hpfs_lock(inode->i_sb);
146 hpfs_i(inode)->i_dirty = 1;
147 hpfs_unlock(inode->i_sb);
148 }
149 return err;
150}
151
134static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) 152static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
135{ 153{
136 return generic_block_bmap(mapping,block,hpfs_get_block); 154 return generic_block_bmap(mapping,block,hpfs_get_block);
@@ -140,30 +158,16 @@ const struct address_space_operations hpfs_aops = {
140 .readpage = hpfs_readpage, 158 .readpage = hpfs_readpage,
141 .writepage = hpfs_writepage, 159 .writepage = hpfs_writepage,
142 .write_begin = hpfs_write_begin, 160 .write_begin = hpfs_write_begin,
143 .write_end = generic_write_end, 161 .write_end = hpfs_write_end,
144 .bmap = _hpfs_bmap 162 .bmap = _hpfs_bmap
145}; 163};
146 164
147static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
148 size_t count, loff_t *ppos)
149{
150 ssize_t retval;
151
152 retval = do_sync_write(file, buf, count, ppos);
153 if (retval > 0) {
154 hpfs_lock(file->f_path.dentry->d_sb);
155 hpfs_i(file_inode(file))->i_dirty = 1;
156 hpfs_unlock(file->f_path.dentry->d_sb);
157 }
158 return retval;
159}
160
161const struct file_operations hpfs_file_ops = 165const struct file_operations hpfs_file_ops =
162{ 166{
163 .llseek = generic_file_llseek, 167 .llseek = generic_file_llseek,
164 .read = do_sync_read, 168 .read = do_sync_read,
165 .aio_read = generic_file_aio_read, 169 .aio_read = generic_file_aio_read,
166 .write = hpfs_file_write, 170 .write = do_sync_write,
167 .aio_write = generic_file_aio_write, 171 .aio_write = generic_file_aio_write,
168 .mmap = generic_file_mmap, 172 .mmap = generic_file_mmap,
169 .release = hpfs_file_release, 173 .release = hpfs_file_release,
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 126d3c2e2dee..cd3e38972c86 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -436,7 +436,6 @@ static int hppfs_open(struct inode *inode, struct file *file)
436 path.mnt = inode->i_sb->s_fs_info; 436 path.mnt = inode->i_sb->s_fs_info;
437 path.dentry = HPPFS_I(inode)->proc_dentry; 437 path.dentry = HPPFS_I(inode)->proc_dentry;
438 438
439 /* XXX This isn't closed anywhere */
440 data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred); 439 data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred);
441 err = PTR_ERR(data->proc_file); 440 err = PTR_ERR(data->proc_file);
442 if (IS_ERR(data->proc_file)) 441 if (IS_ERR(data->proc_file))
@@ -523,12 +522,23 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
523 return default_llseek(file, off, where); 522 return default_llseek(file, off, where);
524} 523}
525 524
525static int hppfs_release(struct inode *inode, struct file *file)
526{
527 struct hppfs_private *data = file->private_data;
528 struct file *proc_file = data->proc_file;
529 if (proc_file)
530 fput(proc_file);
531 kfree(data);
532 return 0;
533}
534
526static const struct file_operations hppfs_file_fops = { 535static const struct file_operations hppfs_file_fops = {
527 .owner = NULL, 536 .owner = NULL,
528 .llseek = hppfs_llseek, 537 .llseek = hppfs_llseek,
529 .read = hppfs_read, 538 .read = hppfs_read,
530 .write = hppfs_write, 539 .write = hppfs_write,
531 .open = hppfs_open, 540 .open = hppfs_open,
541 .release = hppfs_release,
532}; 542};
533 543
534struct hppfs_dirent { 544struct hppfs_dirent {
@@ -570,18 +580,12 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
570 return err; 580 return err;
571} 581}
572 582
573static int hppfs_fsync(struct file *file, loff_t start, loff_t end,
574 int datasync)
575{
576 return filemap_write_and_wait_range(file->f_mapping, start, end);
577}
578
579static const struct file_operations hppfs_dir_fops = { 583static const struct file_operations hppfs_dir_fops = {
580 .owner = NULL, 584 .owner = NULL,
581 .readdir = hppfs_readdir, 585 .readdir = hppfs_readdir,
582 .open = hppfs_dir_open, 586 .open = hppfs_dir_open,
583 .fsync = hppfs_fsync,
584 .llseek = default_llseek, 587 .llseek = default_llseek,
588 .release = hppfs_release,
585}; 589};
586 590
587static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf) 591static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf)
diff --git a/fs/inode.c b/fs/inode.c
index a898b3d43ccf..00d5fc3b86e1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1803,7 +1803,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
1803 inode->i_fop = &def_blk_fops; 1803 inode->i_fop = &def_blk_fops;
1804 inode->i_rdev = rdev; 1804 inode->i_rdev = rdev;
1805 } else if (S_ISFIFO(mode)) 1805 } else if (S_ISFIFO(mode))
1806 inode->i_fop = &def_fifo_fops; 1806 inode->i_fop = &pipefifo_fops;
1807 else if (S_ISSOCK(mode)) 1807 else if (S_ISSOCK(mode))
1808 inode->i_fop = &bad_sock_fops; 1808 inode->i_fop = &bad_sock_fops;
1809 else 1809 else
diff --git a/fs/internal.h b/fs/internal.h
index 4be78237d896..eaa75f75b625 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -130,3 +130,8 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
130 * read_write.c 130 * read_write.c
131 */ 131 */
132extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); 132extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
133
134/*
135 * pipe.c
136 */
137extern const struct file_operations pipefifo_fops;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f6c5ba027f4f..95457576e434 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -979,7 +979,7 @@ static const struct seq_operations jbd2_seq_info_ops = {
979 979
980static int jbd2_seq_info_open(struct inode *inode, struct file *file) 980static int jbd2_seq_info_open(struct inode *inode, struct file *file)
981{ 981{
982 journal_t *journal = PDE(inode)->data; 982 journal_t *journal = PDE_DATA(inode);
983 struct jbd2_stats_proc_session *s; 983 struct jbd2_stats_proc_session *s;
984 int rc, size; 984 int rc, size;
985 985
diff --git a/fs/mount.h b/fs/mount.h
index cd5007980400..64a858143ff9 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -18,6 +18,12 @@ struct mnt_pcp {
18 int mnt_writers; 18 int mnt_writers;
19}; 19};
20 20
21struct mountpoint {
22 struct list_head m_hash;
23 struct dentry *m_dentry;
24 int m_count;
25};
26
21struct mount { 27struct mount {
22 struct list_head mnt_hash; 28 struct list_head mnt_hash;
23 struct mount *mnt_parent; 29 struct mount *mnt_parent;
@@ -40,6 +46,7 @@ struct mount {
40 struct list_head mnt_slave; /* slave list entry */ 46 struct list_head mnt_slave; /* slave list entry */
41 struct mount *mnt_master; /* slave is on master->mnt_slave_list */ 47 struct mount *mnt_master; /* slave is on master->mnt_slave_list */
42 struct mnt_namespace *mnt_ns; /* containing namespace */ 48 struct mnt_namespace *mnt_ns; /* containing namespace */
49 struct mountpoint *mnt_mp; /* where is it mounted */
43#ifdef CONFIG_FSNOTIFY 50#ifdef CONFIG_FSNOTIFY
44 struct hlist_head mnt_fsnotify_marks; 51 struct hlist_head mnt_fsnotify_marks;
45 __u32 mnt_fsnotify_mask; 52 __u32 mnt_fsnotify_mask;
diff --git a/fs/namespace.c b/fs/namespace.c
index 341d3f564082..b4f96a5230a3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -21,7 +21,8 @@
21#include <linux/fs_struct.h> /* get_fs_root et.al. */ 21#include <linux/fs_struct.h> /* get_fs_root et.al. */
22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ 22#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
23#include <linux/uaccess.h> 23#include <linux/uaccess.h>
24#include <linux/proc_fs.h> 24#include <linux/proc_ns.h>
25#include <linux/magic.h>
25#include "pnode.h" 26#include "pnode.h"
26#include "internal.h" 27#include "internal.h"
27 28
@@ -36,6 +37,7 @@ static int mnt_id_start = 0;
36static int mnt_group_start = 1; 37static int mnt_group_start = 1;
37 38
38static struct list_head *mount_hashtable __read_mostly; 39static struct list_head *mount_hashtable __read_mostly;
40static struct list_head *mountpoint_hashtable __read_mostly;
39static struct kmem_cache *mnt_cache __read_mostly; 41static struct kmem_cache *mnt_cache __read_mostly;
40static struct rw_semaphore namespace_sem; 42static struct rw_semaphore namespace_sem;
41 43
@@ -605,6 +607,51 @@ struct vfsmount *lookup_mnt(struct path *path)
605 } 607 }
606} 608}
607 609
610static struct mountpoint *new_mountpoint(struct dentry *dentry)
611{
612 struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry);
613 struct mountpoint *mp;
614
615 list_for_each_entry(mp, chain, m_hash) {
616 if (mp->m_dentry == dentry) {
617 /* might be worth a WARN_ON() */
618 if (d_unlinked(dentry))
619 return ERR_PTR(-ENOENT);
620 mp->m_count++;
621 return mp;
622 }
623 }
624
625 mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
626 if (!mp)
627 return ERR_PTR(-ENOMEM);
628
629 spin_lock(&dentry->d_lock);
630 if (d_unlinked(dentry)) {
631 spin_unlock(&dentry->d_lock);
632 kfree(mp);
633 return ERR_PTR(-ENOENT);
634 }
635 dentry->d_flags |= DCACHE_MOUNTED;
636 spin_unlock(&dentry->d_lock);
637 mp->m_dentry = dentry;
638 mp->m_count = 1;
639 list_add(&mp->m_hash, chain);
640 return mp;
641}
642
643static void put_mountpoint(struct mountpoint *mp)
644{
645 if (!--mp->m_count) {
646 struct dentry *dentry = mp->m_dentry;
647 spin_lock(&dentry->d_lock);
648 dentry->d_flags &= ~DCACHE_MOUNTED;
649 spin_unlock(&dentry->d_lock);
650 list_del(&mp->m_hash);
651 kfree(mp);
652 }
653}
654
608static inline int check_mnt(struct mount *mnt) 655static inline int check_mnt(struct mount *mnt)
609{ 656{
610 return mnt->mnt_ns == current->nsproxy->mnt_ns; 657 return mnt->mnt_ns == current->nsproxy->mnt_ns;
@@ -633,27 +680,6 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
633} 680}
634 681
635/* 682/*
636 * Clear dentry's mounted state if it has no remaining mounts.
637 * vfsmount_lock must be held for write.
638 */
639static void dentry_reset_mounted(struct dentry *dentry)
640{
641 unsigned u;
642
643 for (u = 0; u < HASH_SIZE; u++) {
644 struct mount *p;
645
646 list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
647 if (p->mnt_mountpoint == dentry)
648 return;
649 }
650 }
651 spin_lock(&dentry->d_lock);
652 dentry->d_flags &= ~DCACHE_MOUNTED;
653 spin_unlock(&dentry->d_lock);
654}
655
656/*
657 * vfsmount lock must be held for write 683 * vfsmount lock must be held for write
658 */ 684 */
659static void detach_mnt(struct mount *mnt, struct path *old_path) 685static void detach_mnt(struct mount *mnt, struct path *old_path)
@@ -664,32 +690,35 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
664 mnt->mnt_mountpoint = mnt->mnt.mnt_root; 690 mnt->mnt_mountpoint = mnt->mnt.mnt_root;
665 list_del_init(&mnt->mnt_child); 691 list_del_init(&mnt->mnt_child);
666 list_del_init(&mnt->mnt_hash); 692 list_del_init(&mnt->mnt_hash);
667 dentry_reset_mounted(old_path->dentry); 693 put_mountpoint(mnt->mnt_mp);
694 mnt->mnt_mp = NULL;
668} 695}
669 696
670/* 697/*
671 * vfsmount lock must be held for write 698 * vfsmount lock must be held for write
672 */ 699 */
673void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry, 700void mnt_set_mountpoint(struct mount *mnt,
701 struct mountpoint *mp,
674 struct mount *child_mnt) 702 struct mount *child_mnt)
675{ 703{
704 mp->m_count++;
676 mnt_add_count(mnt, 1); /* essentially, that's mntget */ 705 mnt_add_count(mnt, 1); /* essentially, that's mntget */
677 child_mnt->mnt_mountpoint = dget(dentry); 706 child_mnt->mnt_mountpoint = dget(mp->m_dentry);
678 child_mnt->mnt_parent = mnt; 707 child_mnt->mnt_parent = mnt;
679 spin_lock(&dentry->d_lock); 708 child_mnt->mnt_mp = mp;
680 dentry->d_flags |= DCACHE_MOUNTED;
681 spin_unlock(&dentry->d_lock);
682} 709}
683 710
684/* 711/*
685 * vfsmount lock must be held for write 712 * vfsmount lock must be held for write
686 */ 713 */
687static void attach_mnt(struct mount *mnt, struct path *path) 714static void attach_mnt(struct mount *mnt,
715 struct mount *parent,
716 struct mountpoint *mp)
688{ 717{
689 mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt); 718 mnt_set_mountpoint(parent, mp, mnt);
690 list_add_tail(&mnt->mnt_hash, mount_hashtable + 719 list_add_tail(&mnt->mnt_hash, mount_hashtable +
691 hash(path->mnt, path->dentry)); 720 hash(&parent->mnt, mp->m_dentry));
692 list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts); 721 list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
693} 722}
694 723
695/* 724/*
@@ -1095,11 +1124,23 @@ int may_umount(struct vfsmount *mnt)
1095 1124
1096EXPORT_SYMBOL(may_umount); 1125EXPORT_SYMBOL(may_umount);
1097 1126
1098void release_mounts(struct list_head *head) 1127static LIST_HEAD(unmounted); /* protected by namespace_sem */
1128
1129static void namespace_unlock(void)
1099{ 1130{
1100 struct mount *mnt; 1131 struct mount *mnt;
1101 while (!list_empty(head)) { 1132 LIST_HEAD(head);
1102 mnt = list_first_entry(head, struct mount, mnt_hash); 1133
1134 if (likely(list_empty(&unmounted))) {
1135 up_write(&namespace_sem);
1136 return;
1137 }
1138
1139 list_splice_init(&unmounted, &head);
1140 up_write(&namespace_sem);
1141
1142 while (!list_empty(&head)) {
1143 mnt = list_first_entry(&head, struct mount, mnt_hash);
1103 list_del_init(&mnt->mnt_hash); 1144 list_del_init(&mnt->mnt_hash);
1104 if (mnt_has_parent(mnt)) { 1145 if (mnt_has_parent(mnt)) {
1105 struct dentry *dentry; 1146 struct dentry *dentry;
@@ -1119,11 +1160,16 @@ void release_mounts(struct list_head *head)
1119 } 1160 }
1120} 1161}
1121 1162
1163static inline void namespace_lock(void)
1164{
1165 down_write(&namespace_sem);
1166}
1167
1122/* 1168/*
1123 * vfsmount lock must be held for write 1169 * vfsmount lock must be held for write
1124 * namespace_sem must be held for write 1170 * namespace_sem must be held for write
1125 */ 1171 */
1126void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) 1172void umount_tree(struct mount *mnt, int propagate)
1127{ 1173{
1128 LIST_HEAD(tmp_list); 1174 LIST_HEAD(tmp_list);
1129 struct mount *p; 1175 struct mount *p;
@@ -1142,20 +1188,20 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
1142 list_del_init(&p->mnt_child); 1188 list_del_init(&p->mnt_child);
1143 if (mnt_has_parent(p)) { 1189 if (mnt_has_parent(p)) {
1144 p->mnt_parent->mnt_ghosts++; 1190 p->mnt_parent->mnt_ghosts++;
1145 dentry_reset_mounted(p->mnt_mountpoint); 1191 put_mountpoint(p->mnt_mp);
1192 p->mnt_mp = NULL;
1146 } 1193 }
1147 change_mnt_propagation(p, MS_PRIVATE); 1194 change_mnt_propagation(p, MS_PRIVATE);
1148 } 1195 }
1149 list_splice(&tmp_list, kill); 1196 list_splice(&tmp_list, &unmounted);
1150} 1197}
1151 1198
1152static void shrink_submounts(struct mount *mnt, struct list_head *umounts); 1199static void shrink_submounts(struct mount *mnt);
1153 1200
1154static int do_umount(struct mount *mnt, int flags) 1201static int do_umount(struct mount *mnt, int flags)
1155{ 1202{
1156 struct super_block *sb = mnt->mnt.mnt_sb; 1203 struct super_block *sb = mnt->mnt.mnt_sb;
1157 int retval; 1204 int retval;
1158 LIST_HEAD(umount_list);
1159 1205
1160 retval = security_sb_umount(&mnt->mnt, flags); 1206 retval = security_sb_umount(&mnt->mnt, flags);
1161 if (retval) 1207 if (retval)
@@ -1222,22 +1268,21 @@ static int do_umount(struct mount *mnt, int flags)
1222 return retval; 1268 return retval;
1223 } 1269 }
1224 1270
1225 down_write(&namespace_sem); 1271 namespace_lock();
1226 br_write_lock(&vfsmount_lock); 1272 br_write_lock(&vfsmount_lock);
1227 event++; 1273 event++;
1228 1274
1229 if (!(flags & MNT_DETACH)) 1275 if (!(flags & MNT_DETACH))
1230 shrink_submounts(mnt, &umount_list); 1276 shrink_submounts(mnt);
1231 1277
1232 retval = -EBUSY; 1278 retval = -EBUSY;
1233 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { 1279 if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
1234 if (!list_empty(&mnt->mnt_list)) 1280 if (!list_empty(&mnt->mnt_list))
1235 umount_tree(mnt, 1, &umount_list); 1281 umount_tree(mnt, 1);
1236 retval = 0; 1282 retval = 0;
1237 } 1283 }
1238 br_write_unlock(&vfsmount_lock); 1284 br_write_unlock(&vfsmount_lock);
1239 up_write(&namespace_sem); 1285 namespace_unlock();
1240 release_mounts(&umount_list);
1241 return retval; 1286 return retval;
1242} 1287}
1243 1288
@@ -1310,13 +1355,13 @@ static bool mnt_ns_loop(struct path *path)
1310 * mount namespace loop? 1355 * mount namespace loop?
1311 */ 1356 */
1312 struct inode *inode = path->dentry->d_inode; 1357 struct inode *inode = path->dentry->d_inode;
1313 struct proc_inode *ei; 1358 struct proc_ns *ei;
1314 struct mnt_namespace *mnt_ns; 1359 struct mnt_namespace *mnt_ns;
1315 1360
1316 if (!proc_ns_inode(inode)) 1361 if (!proc_ns_inode(inode))
1317 return false; 1362 return false;
1318 1363
1319 ei = PROC_I(inode); 1364 ei = get_proc_ns(inode);
1320 if (ei->ns_ops != &mntns_operations) 1365 if (ei->ns_ops != &mntns_operations)
1321 return false; 1366 return false;
1322 1367
@@ -1327,8 +1372,7 @@ static bool mnt_ns_loop(struct path *path)
1327struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, 1372struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1328 int flag) 1373 int flag)
1329{ 1374{
1330 struct mount *res, *p, *q, *r; 1375 struct mount *res, *p, *q, *r, *parent;
1331 struct path path;
1332 1376
1333 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) 1377 if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
1334 return ERR_PTR(-EINVAL); 1378 return ERR_PTR(-EINVAL);
@@ -1355,25 +1399,22 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
1355 q = q->mnt_parent; 1399 q = q->mnt_parent;
1356 } 1400 }
1357 p = s; 1401 p = s;
1358 path.mnt = &q->mnt; 1402 parent = q;
1359 path.dentry = p->mnt_mountpoint;
1360 q = clone_mnt(p, p->mnt.mnt_root, flag); 1403 q = clone_mnt(p, p->mnt.mnt_root, flag);
1361 if (IS_ERR(q)) 1404 if (IS_ERR(q))
1362 goto out; 1405 goto out;
1363 br_write_lock(&vfsmount_lock); 1406 br_write_lock(&vfsmount_lock);
1364 list_add_tail(&q->mnt_list, &res->mnt_list); 1407 list_add_tail(&q->mnt_list, &res->mnt_list);
1365 attach_mnt(q, &path); 1408 attach_mnt(q, parent, p->mnt_mp);
1366 br_write_unlock(&vfsmount_lock); 1409 br_write_unlock(&vfsmount_lock);
1367 } 1410 }
1368 } 1411 }
1369 return res; 1412 return res;
1370out: 1413out:
1371 if (res) { 1414 if (res) {
1372 LIST_HEAD(umount_list);
1373 br_write_lock(&vfsmount_lock); 1415 br_write_lock(&vfsmount_lock);
1374 umount_tree(res, 0, &umount_list); 1416 umount_tree(res, 0);
1375 br_write_unlock(&vfsmount_lock); 1417 br_write_unlock(&vfsmount_lock);
1376 release_mounts(&umount_list);
1377 } 1418 }
1378 return q; 1419 return q;
1379} 1420}
@@ -1383,10 +1424,10 @@ out:
1383struct vfsmount *collect_mounts(struct path *path) 1424struct vfsmount *collect_mounts(struct path *path)
1384{ 1425{
1385 struct mount *tree; 1426 struct mount *tree;
1386 down_write(&namespace_sem); 1427 namespace_lock();
1387 tree = copy_tree(real_mount(path->mnt), path->dentry, 1428 tree = copy_tree(real_mount(path->mnt), path->dentry,
1388 CL_COPY_ALL | CL_PRIVATE); 1429 CL_COPY_ALL | CL_PRIVATE);
1389 up_write(&namespace_sem); 1430 namespace_unlock();
1390 if (IS_ERR(tree)) 1431 if (IS_ERR(tree))
1391 return NULL; 1432 return NULL;
1392 return &tree->mnt; 1433 return &tree->mnt;
@@ -1394,13 +1435,11 @@ struct vfsmount *collect_mounts(struct path *path)
1394 1435
1395void drop_collected_mounts(struct vfsmount *mnt) 1436void drop_collected_mounts(struct vfsmount *mnt)
1396{ 1437{
1397 LIST_HEAD(umount_list); 1438 namespace_lock();
1398 down_write(&namespace_sem);
1399 br_write_lock(&vfsmount_lock); 1439 br_write_lock(&vfsmount_lock);
1400 umount_tree(real_mount(mnt), 0, &umount_list); 1440 umount_tree(real_mount(mnt), 0);
1401 br_write_unlock(&vfsmount_lock); 1441 br_write_unlock(&vfsmount_lock);
1402 up_write(&namespace_sem); 1442 namespace_unlock();
1403 release_mounts(&umount_list);
1404} 1443}
1405 1444
1406int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, 1445int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
@@ -1509,11 +1548,11 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
1509 * in allocations. 1548 * in allocations.
1510 */ 1549 */
1511static int attach_recursive_mnt(struct mount *source_mnt, 1550static int attach_recursive_mnt(struct mount *source_mnt,
1512 struct path *path, struct path *parent_path) 1551 struct mount *dest_mnt,
1552 struct mountpoint *dest_mp,
1553 struct path *parent_path)
1513{ 1554{
1514 LIST_HEAD(tree_list); 1555 LIST_HEAD(tree_list);
1515 struct mount *dest_mnt = real_mount(path->mnt);
1516 struct dentry *dest_dentry = path->dentry;
1517 struct mount *child, *p; 1556 struct mount *child, *p;
1518 int err; 1557 int err;
1519 1558
@@ -1522,7 +1561,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1522 if (err) 1561 if (err)
1523 goto out; 1562 goto out;
1524 } 1563 }
1525 err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list); 1564 err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
1526 if (err) 1565 if (err)
1527 goto out_cleanup_ids; 1566 goto out_cleanup_ids;
1528 1567
@@ -1534,10 +1573,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1534 } 1573 }
1535 if (parent_path) { 1574 if (parent_path) {
1536 detach_mnt(source_mnt, parent_path); 1575 detach_mnt(source_mnt, parent_path);
1537 attach_mnt(source_mnt, path); 1576 attach_mnt(source_mnt, dest_mnt, dest_mp);
1538 touch_mnt_namespace(source_mnt->mnt_ns); 1577 touch_mnt_namespace(source_mnt->mnt_ns);
1539 } else { 1578 } else {
1540 mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt); 1579 mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
1541 commit_tree(source_mnt); 1580 commit_tree(source_mnt);
1542 } 1581 }
1543 1582
@@ -1556,46 +1595,53 @@ static int attach_recursive_mnt(struct mount *source_mnt,
1556 return err; 1595 return err;
1557} 1596}
1558 1597
1559static int lock_mount(struct path *path) 1598static struct mountpoint *lock_mount(struct path *path)
1560{ 1599{
1561 struct vfsmount *mnt; 1600 struct vfsmount *mnt;
1601 struct dentry *dentry = path->dentry;
1562retry: 1602retry:
1563 mutex_lock(&path->dentry->d_inode->i_mutex); 1603 mutex_lock(&dentry->d_inode->i_mutex);
1564 if (unlikely(cant_mount(path->dentry))) { 1604 if (unlikely(cant_mount(dentry))) {
1565 mutex_unlock(&path->dentry->d_inode->i_mutex); 1605 mutex_unlock(&dentry->d_inode->i_mutex);
1566 return -ENOENT; 1606 return ERR_PTR(-ENOENT);
1567 } 1607 }
1568 down_write(&namespace_sem); 1608 namespace_lock();
1569 mnt = lookup_mnt(path); 1609 mnt = lookup_mnt(path);
1570 if (likely(!mnt)) 1610 if (likely(!mnt)) {
1571 return 0; 1611 struct mountpoint *mp = new_mountpoint(dentry);
1572 up_write(&namespace_sem); 1612 if (IS_ERR(mp)) {
1613 namespace_unlock();
1614 mutex_unlock(&dentry->d_inode->i_mutex);
1615 return mp;
1616 }
1617 return mp;
1618 }
1619 namespace_unlock();
1573 mutex_unlock(&path->dentry->d_inode->i_mutex); 1620 mutex_unlock(&path->dentry->d_inode->i_mutex);
1574 path_put(path); 1621 path_put(path);
1575 path->mnt = mnt; 1622 path->mnt = mnt;
1576 path->dentry = dget(mnt->mnt_root); 1623 dentry = path->dentry = dget(mnt->mnt_root);
1577 goto retry; 1624 goto retry;
1578} 1625}
1579 1626
1580static void unlock_mount(struct path *path) 1627static void unlock_mount(struct mountpoint *where)
1581{ 1628{
1582 up_write(&namespace_sem); 1629 struct dentry *dentry = where->m_dentry;
1583 mutex_unlock(&path->dentry->d_inode->i_mutex); 1630 put_mountpoint(where);
1631 namespace_unlock();
1632 mutex_unlock(&dentry->d_inode->i_mutex);
1584} 1633}
1585 1634
1586static int graft_tree(struct mount *mnt, struct path *path) 1635static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
1587{ 1636{
1588 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER) 1637 if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
1589 return -EINVAL; 1638 return -EINVAL;
1590 1639
1591 if (S_ISDIR(path->dentry->d_inode->i_mode) != 1640 if (S_ISDIR(mp->m_dentry->d_inode->i_mode) !=
1592 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode)) 1641 S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
1593 return -ENOTDIR; 1642 return -ENOTDIR;
1594 1643
1595 if (d_unlinked(path->dentry)) 1644 return attach_recursive_mnt(mnt, p, mp, NULL);
1596 return -ENOENT;
1597
1598 return attach_recursive_mnt(mnt, path, NULL);
1599} 1645}
1600 1646
1601/* 1647/*
@@ -1633,7 +1679,7 @@ static int do_change_type(struct path *path, int flag)
1633 if (!type) 1679 if (!type)
1634 return -EINVAL; 1680 return -EINVAL;
1635 1681
1636 down_write(&namespace_sem); 1682 namespace_lock();
1637 if (type == MS_SHARED) { 1683 if (type == MS_SHARED) {
1638 err = invent_group_ids(mnt, recurse); 1684 err = invent_group_ids(mnt, recurse);
1639 if (err) 1685 if (err)
@@ -1646,7 +1692,7 @@ static int do_change_type(struct path *path, int flag)
1646 br_write_unlock(&vfsmount_lock); 1692 br_write_unlock(&vfsmount_lock);
1647 1693
1648 out_unlock: 1694 out_unlock:
1649 up_write(&namespace_sem); 1695 namespace_unlock();
1650 return err; 1696 return err;
1651} 1697}
1652 1698
@@ -1656,9 +1702,9 @@ static int do_change_type(struct path *path, int flag)
1656static int do_loopback(struct path *path, const char *old_name, 1702static int do_loopback(struct path *path, const char *old_name,
1657 int recurse) 1703 int recurse)
1658{ 1704{
1659 LIST_HEAD(umount_list);
1660 struct path old_path; 1705 struct path old_path;
1661 struct mount *mnt = NULL, *old; 1706 struct mount *mnt = NULL, *old, *parent;
1707 struct mountpoint *mp;
1662 int err; 1708 int err;
1663 if (!old_name || !*old_name) 1709 if (!old_name || !*old_name)
1664 return -EINVAL; 1710 return -EINVAL;
@@ -1670,17 +1716,19 @@ static int do_loopback(struct path *path, const char *old_name,
1670 if (mnt_ns_loop(&old_path)) 1716 if (mnt_ns_loop(&old_path))
1671 goto out; 1717 goto out;
1672 1718
1673 err = lock_mount(path); 1719 mp = lock_mount(path);
1674 if (err) 1720 err = PTR_ERR(mp);
1721 if (IS_ERR(mp))
1675 goto out; 1722 goto out;
1676 1723
1677 old = real_mount(old_path.mnt); 1724 old = real_mount(old_path.mnt);
1725 parent = real_mount(path->mnt);
1678 1726
1679 err = -EINVAL; 1727 err = -EINVAL;
1680 if (IS_MNT_UNBINDABLE(old)) 1728 if (IS_MNT_UNBINDABLE(old))
1681 goto out2; 1729 goto out2;
1682 1730
1683 if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old)) 1731 if (!check_mnt(parent) || !check_mnt(old))
1684 goto out2; 1732 goto out2;
1685 1733
1686 if (recurse) 1734 if (recurse)
@@ -1693,15 +1741,14 @@ static int do_loopback(struct path *path, const char *old_name,
1693 goto out2; 1741 goto out2;
1694 } 1742 }
1695 1743
1696 err = graft_tree(mnt, path); 1744 err = graft_tree(mnt, parent, mp);
1697 if (err) { 1745 if (err) {
1698 br_write_lock(&vfsmount_lock); 1746 br_write_lock(&vfsmount_lock);
1699 umount_tree(mnt, 0, &umount_list); 1747 umount_tree(mnt, 0);
1700 br_write_unlock(&vfsmount_lock); 1748 br_write_unlock(&vfsmount_lock);
1701 } 1749 }
1702out2: 1750out2:
1703 unlock_mount(path); 1751 unlock_mount(mp);
1704 release_mounts(&umount_list);
1705out: 1752out:
1706 path_put(&old_path); 1753 path_put(&old_path);
1707 return err; 1754 return err;
@@ -1786,6 +1833,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1786 struct path old_path, parent_path; 1833 struct path old_path, parent_path;
1787 struct mount *p; 1834 struct mount *p;
1788 struct mount *old; 1835 struct mount *old;
1836 struct mountpoint *mp;
1789 int err; 1837 int err;
1790 if (!old_name || !*old_name) 1838 if (!old_name || !*old_name)
1791 return -EINVAL; 1839 return -EINVAL;
@@ -1793,8 +1841,9 @@ static int do_move_mount(struct path *path, const char *old_name)
1793 if (err) 1841 if (err)
1794 return err; 1842 return err;
1795 1843
1796 err = lock_mount(path); 1844 mp = lock_mount(path);
1797 if (err < 0) 1845 err = PTR_ERR(mp);
1846 if (IS_ERR(mp))
1798 goto out; 1847 goto out;
1799 1848
1800 old = real_mount(old_path.mnt); 1849 old = real_mount(old_path.mnt);
@@ -1804,9 +1853,6 @@ static int do_move_mount(struct path *path, const char *old_name)
1804 if (!check_mnt(p) || !check_mnt(old)) 1853 if (!check_mnt(p) || !check_mnt(old))
1805 goto out1; 1854 goto out1;
1806 1855
1807 if (d_unlinked(path->dentry))
1808 goto out1;
1809
1810 err = -EINVAL; 1856 err = -EINVAL;
1811 if (old_path.dentry != old_path.mnt->mnt_root) 1857 if (old_path.dentry != old_path.mnt->mnt_root)
1812 goto out1; 1858 goto out1;
@@ -1833,7 +1879,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1833 if (p == old) 1879 if (p == old)
1834 goto out1; 1880 goto out1;
1835 1881
1836 err = attach_recursive_mnt(old, path, &parent_path); 1882 err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
1837 if (err) 1883 if (err)
1838 goto out1; 1884 goto out1;
1839 1885
@@ -1841,7 +1887,7 @@ static int do_move_mount(struct path *path, const char *old_name)
1841 * automatically */ 1887 * automatically */
1842 list_del_init(&old->mnt_expire); 1888 list_del_init(&old->mnt_expire);
1843out1: 1889out1:
1844 unlock_mount(path); 1890 unlock_mount(mp);
1845out: 1891out:
1846 if (!err) 1892 if (!err)
1847 path_put(&parent_path); 1893 path_put(&parent_path);
@@ -1877,21 +1923,24 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
1877 */ 1923 */
1878static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) 1924static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1879{ 1925{
1926 struct mountpoint *mp;
1927 struct mount *parent;
1880 int err; 1928 int err;
1881 1929
1882 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); 1930 mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
1883 1931
1884 err = lock_mount(path); 1932 mp = lock_mount(path);
1885 if (err) 1933 if (IS_ERR(mp))
1886 return err; 1934 return PTR_ERR(mp);
1887 1935
1936 parent = real_mount(path->mnt);
1888 err = -EINVAL; 1937 err = -EINVAL;
1889 if (unlikely(!check_mnt(real_mount(path->mnt)))) { 1938 if (unlikely(!check_mnt(parent))) {
1890 /* that's acceptable only for automounts done in private ns */ 1939 /* that's acceptable only for automounts done in private ns */
1891 if (!(mnt_flags & MNT_SHRINKABLE)) 1940 if (!(mnt_flags & MNT_SHRINKABLE))
1892 goto unlock; 1941 goto unlock;
1893 /* ... and for those we'd better have mountpoint still alive */ 1942 /* ... and for those we'd better have mountpoint still alive */
1894 if (!real_mount(path->mnt)->mnt_ns) 1943 if (!parent->mnt_ns)
1895 goto unlock; 1944 goto unlock;
1896 } 1945 }
1897 1946
@@ -1906,10 +1955,10 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
1906 goto unlock; 1955 goto unlock;
1907 1956
1908 newmnt->mnt.mnt_flags = mnt_flags; 1957 newmnt->mnt.mnt_flags = mnt_flags;
1909 err = graft_tree(newmnt, path); 1958 err = graft_tree(newmnt, parent, mp);
1910 1959
1911unlock: 1960unlock:
1912 unlock_mount(path); 1961 unlock_mount(mp);
1913 return err; 1962 return err;
1914} 1963}
1915 1964
@@ -1982,11 +2031,11 @@ int finish_automount(struct vfsmount *m, struct path *path)
1982fail: 2031fail:
1983 /* remove m from any expiration list it may be on */ 2032 /* remove m from any expiration list it may be on */
1984 if (!list_empty(&mnt->mnt_expire)) { 2033 if (!list_empty(&mnt->mnt_expire)) {
1985 down_write(&namespace_sem); 2034 namespace_lock();
1986 br_write_lock(&vfsmount_lock); 2035 br_write_lock(&vfsmount_lock);
1987 list_del_init(&mnt->mnt_expire); 2036 list_del_init(&mnt->mnt_expire);
1988 br_write_unlock(&vfsmount_lock); 2037 br_write_unlock(&vfsmount_lock);
1989 up_write(&namespace_sem); 2038 namespace_unlock();
1990 } 2039 }
1991 mntput(m); 2040 mntput(m);
1992 mntput(m); 2041 mntput(m);
@@ -2000,13 +2049,13 @@ fail:
2000 */ 2049 */
2001void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) 2050void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
2002{ 2051{
2003 down_write(&namespace_sem); 2052 namespace_lock();
2004 br_write_lock(&vfsmount_lock); 2053 br_write_lock(&vfsmount_lock);
2005 2054
2006 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); 2055 list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
2007 2056
2008 br_write_unlock(&vfsmount_lock); 2057 br_write_unlock(&vfsmount_lock);
2009 up_write(&namespace_sem); 2058 namespace_unlock();
2010} 2059}
2011EXPORT_SYMBOL(mnt_set_expiry); 2060EXPORT_SYMBOL(mnt_set_expiry);
2012 2061
@@ -2019,12 +2068,11 @@ void mark_mounts_for_expiry(struct list_head *mounts)
2019{ 2068{
2020 struct mount *mnt, *next; 2069 struct mount *mnt, *next;
2021 LIST_HEAD(graveyard); 2070 LIST_HEAD(graveyard);
2022 LIST_HEAD(umounts);
2023 2071
2024 if (list_empty(mounts)) 2072 if (list_empty(mounts))
2025 return; 2073 return;
2026 2074
2027 down_write(&namespace_sem); 2075 namespace_lock();
2028 br_write_lock(&vfsmount_lock); 2076 br_write_lock(&vfsmount_lock);
2029 2077
2030 /* extract from the expiration list every vfsmount that matches the 2078 /* extract from the expiration list every vfsmount that matches the
@@ -2042,12 +2090,10 @@ void mark_mounts_for_expiry(struct list_head *mounts)
2042 while (!list_empty(&graveyard)) { 2090 while (!list_empty(&graveyard)) {
2043 mnt = list_first_entry(&graveyard, struct mount, mnt_expire); 2091 mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
2044 touch_mnt_namespace(mnt->mnt_ns); 2092 touch_mnt_namespace(mnt->mnt_ns);
2045 umount_tree(mnt, 1, &umounts); 2093 umount_tree(mnt, 1);
2046 } 2094 }
2047 br_write_unlock(&vfsmount_lock); 2095 br_write_unlock(&vfsmount_lock);
2048 up_write(&namespace_sem); 2096 namespace_unlock();
2049
2050 release_mounts(&umounts);
2051} 2097}
2052 2098
2053EXPORT_SYMBOL_GPL(mark_mounts_for_expiry); 2099EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
@@ -2104,7 +2150,7 @@ resume:
2104 * 2150 *
2105 * vfsmount_lock must be held for write 2151 * vfsmount_lock must be held for write
2106 */ 2152 */
2107static void shrink_submounts(struct mount *mnt, struct list_head *umounts) 2153static void shrink_submounts(struct mount *mnt)
2108{ 2154{
2109 LIST_HEAD(graveyard); 2155 LIST_HEAD(graveyard);
2110 struct mount *m; 2156 struct mount *m;
@@ -2115,7 +2161,7 @@ static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
2115 m = list_first_entry(&graveyard, struct mount, 2161 m = list_first_entry(&graveyard, struct mount,
2116 mnt_expire); 2162 mnt_expire);
2117 touch_mnt_namespace(m->mnt_ns); 2163 touch_mnt_namespace(m->mnt_ns);
2118 umount_tree(m, 1, umounts); 2164 umount_tree(m, 1);
2119 } 2165 }
2120 } 2166 }
2121} 2167}
@@ -2342,14 +2388,14 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2342 if (IS_ERR(new_ns)) 2388 if (IS_ERR(new_ns))
2343 return new_ns; 2389 return new_ns;
2344 2390
2345 down_write(&namespace_sem); 2391 namespace_lock();
2346 /* First pass: copy the tree topology */ 2392 /* First pass: copy the tree topology */
2347 copy_flags = CL_COPY_ALL | CL_EXPIRE; 2393 copy_flags = CL_COPY_ALL | CL_EXPIRE;
2348 if (user_ns != mnt_ns->user_ns) 2394 if (user_ns != mnt_ns->user_ns)
2349 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED; 2395 copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
2350 new = copy_tree(old, old->mnt.mnt_root, copy_flags); 2396 new = copy_tree(old, old->mnt.mnt_root, copy_flags);
2351 if (IS_ERR(new)) { 2397 if (IS_ERR(new)) {
2352 up_write(&namespace_sem); 2398 namespace_unlock();
2353 free_mnt_ns(new_ns); 2399 free_mnt_ns(new_ns);
2354 return ERR_CAST(new); 2400 return ERR_CAST(new);
2355 } 2401 }
@@ -2380,7 +2426,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2380 p = next_mnt(p, old); 2426 p = next_mnt(p, old);
2381 q = next_mnt(q, new); 2427 q = next_mnt(q, new);
2382 } 2428 }
2383 up_write(&namespace_sem); 2429 namespace_unlock();
2384 2430
2385 if (rootmnt) 2431 if (rootmnt)
2386 mntput(rootmnt); 2432 mntput(rootmnt);
@@ -2550,7 +2596,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2550 const char __user *, put_old) 2596 const char __user *, put_old)
2551{ 2597{
2552 struct path new, old, parent_path, root_parent, root; 2598 struct path new, old, parent_path, root_parent, root;
2553 struct mount *new_mnt, *root_mnt; 2599 struct mount *new_mnt, *root_mnt, *old_mnt;
2600 struct mountpoint *old_mp, *root_mp;
2554 int error; 2601 int error;
2555 2602
2556 if (!may_mount()) 2603 if (!may_mount())
@@ -2569,14 +2616,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2569 goto out2; 2616 goto out2;
2570 2617
2571 get_fs_root(current->fs, &root); 2618 get_fs_root(current->fs, &root);
2572 error = lock_mount(&old); 2619 old_mp = lock_mount(&old);
2573 if (error) 2620 error = PTR_ERR(old_mp);
2621 if (IS_ERR(old_mp))
2574 goto out3; 2622 goto out3;
2575 2623
2576 error = -EINVAL; 2624 error = -EINVAL;
2577 new_mnt = real_mount(new.mnt); 2625 new_mnt = real_mount(new.mnt);
2578 root_mnt = real_mount(root.mnt); 2626 root_mnt = real_mount(root.mnt);
2579 if (IS_MNT_SHARED(real_mount(old.mnt)) || 2627 old_mnt = real_mount(old.mnt);
2628 if (IS_MNT_SHARED(old_mnt) ||
2580 IS_MNT_SHARED(new_mnt->mnt_parent) || 2629 IS_MNT_SHARED(new_mnt->mnt_parent) ||
2581 IS_MNT_SHARED(root_mnt->mnt_parent)) 2630 IS_MNT_SHARED(root_mnt->mnt_parent))
2582 goto out4; 2631 goto out4;
@@ -2585,37 +2634,37 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2585 error = -ENOENT; 2634 error = -ENOENT;
2586 if (d_unlinked(new.dentry)) 2635 if (d_unlinked(new.dentry))
2587 goto out4; 2636 goto out4;
2588 if (d_unlinked(old.dentry))
2589 goto out4;
2590 error = -EBUSY; 2637 error = -EBUSY;
2591 if (new.mnt == root.mnt || 2638 if (new_mnt == root_mnt || old_mnt == root_mnt)
2592 old.mnt == root.mnt)
2593 goto out4; /* loop, on the same file system */ 2639 goto out4; /* loop, on the same file system */
2594 error = -EINVAL; 2640 error = -EINVAL;
2595 if (root.mnt->mnt_root != root.dentry) 2641 if (root.mnt->mnt_root != root.dentry)
2596 goto out4; /* not a mountpoint */ 2642 goto out4; /* not a mountpoint */
2597 if (!mnt_has_parent(root_mnt)) 2643 if (!mnt_has_parent(root_mnt))
2598 goto out4; /* not attached */ 2644 goto out4; /* not attached */
2645 root_mp = root_mnt->mnt_mp;
2599 if (new.mnt->mnt_root != new.dentry) 2646 if (new.mnt->mnt_root != new.dentry)
2600 goto out4; /* not a mountpoint */ 2647 goto out4; /* not a mountpoint */
2601 if (!mnt_has_parent(new_mnt)) 2648 if (!mnt_has_parent(new_mnt))
2602 goto out4; /* not attached */ 2649 goto out4; /* not attached */
2603 /* make sure we can reach put_old from new_root */ 2650 /* make sure we can reach put_old from new_root */
2604 if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) 2651 if (!is_path_reachable(old_mnt, old.dentry, &new))
2605 goto out4; 2652 goto out4;
2653 root_mp->m_count++; /* pin it so it won't go away */
2606 br_write_lock(&vfsmount_lock); 2654 br_write_lock(&vfsmount_lock);
2607 detach_mnt(new_mnt, &parent_path); 2655 detach_mnt(new_mnt, &parent_path);
2608 detach_mnt(root_mnt, &root_parent); 2656 detach_mnt(root_mnt, &root_parent);
2609 /* mount old root on put_old */ 2657 /* mount old root on put_old */
2610 attach_mnt(root_mnt, &old); 2658 attach_mnt(root_mnt, old_mnt, old_mp);
2611 /* mount new_root on / */ 2659 /* mount new_root on / */
2612 attach_mnt(new_mnt, &root_parent); 2660 attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
2613 touch_mnt_namespace(current->nsproxy->mnt_ns); 2661 touch_mnt_namespace(current->nsproxy->mnt_ns);
2614 br_write_unlock(&vfsmount_lock); 2662 br_write_unlock(&vfsmount_lock);
2615 chroot_fs_refs(&root, &new); 2663 chroot_fs_refs(&root, &new);
2664 put_mountpoint(root_mp);
2616 error = 0; 2665 error = 0;
2617out4: 2666out4:
2618 unlock_mount(&old); 2667 unlock_mount(old_mp);
2619 if (!error) { 2668 if (!error) {
2620 path_put(&root_parent); 2669 path_put(&root_parent);
2621 path_put(&parent_path); 2670 path_put(&parent_path);
@@ -2670,14 +2719,17 @@ void __init mnt_init(void)
2670 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 2719 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2671 2720
2672 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); 2721 mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2722 mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
2673 2723
2674 if (!mount_hashtable) 2724 if (!mount_hashtable || !mountpoint_hashtable)
2675 panic("Failed to allocate mount hash table\n"); 2725 panic("Failed to allocate mount hash table\n");
2676 2726
2677 printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); 2727 printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
2678 2728
2679 for (u = 0; u < HASH_SIZE; u++) 2729 for (u = 0; u < HASH_SIZE; u++)
2680 INIT_LIST_HEAD(&mount_hashtable[u]); 2730 INIT_LIST_HEAD(&mount_hashtable[u]);
2731 for (u = 0; u < HASH_SIZE; u++)
2732 INIT_LIST_HEAD(&mountpoint_hashtable[u]);
2681 2733
2682 br_lock_init(&vfsmount_lock); 2734 br_lock_init(&vfsmount_lock);
2683 2735
@@ -2694,16 +2746,13 @@ void __init mnt_init(void)
2694 2746
2695void put_mnt_ns(struct mnt_namespace *ns) 2747void put_mnt_ns(struct mnt_namespace *ns)
2696{ 2748{
2697 LIST_HEAD(umount_list);
2698
2699 if (!atomic_dec_and_test(&ns->count)) 2749 if (!atomic_dec_and_test(&ns->count))
2700 return; 2750 return;
2701 down_write(&namespace_sem); 2751 namespace_lock();
2702 br_write_lock(&vfsmount_lock); 2752 br_write_lock(&vfsmount_lock);
2703 umount_tree(ns->root, 0, &umount_list); 2753 umount_tree(ns->root, 0);
2704 br_write_unlock(&vfsmount_lock); 2754 br_write_unlock(&vfsmount_lock);
2705 up_write(&namespace_sem); 2755 namespace_unlock();
2706 release_mounts(&umount_list);
2707 free_mnt_ns(ns); 2756 free_mnt_ns(ns);
2708} 2757}
2709 2758
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f33455b4d957..5bee0313dffd 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -177,7 +177,7 @@ static int export_features_open(struct inode *inode, struct file *file)
177 return single_open(file, export_features_show, NULL); 177 return single_open(file, export_features_show, NULL);
178} 178}
179 179
180static struct file_operations export_features_operations = { 180static const struct file_operations export_features_operations = {
181 .open = export_features_open, 181 .open = export_features_open,
182 .read = seq_read, 182 .read = seq_read,
183 .llseek = seq_lseek, 183 .llseek = seq_lseek,
@@ -196,7 +196,7 @@ static int supported_enctypes_open(struct inode *inode, struct file *file)
196 return single_open(file, supported_enctypes_show, NULL); 196 return single_open(file, supported_enctypes_show, NULL);
197} 197}
198 198
199static struct file_operations supported_enctypes_ops = { 199static const struct file_operations supported_enctypes_ops = {
200 .open = supported_enctypes_open, 200 .open = supported_enctypes_open,
201 .read = seq_read, 201 .read = seq_read,
202 .llseek = seq_lseek, 202 .llseek = seq_lseek,
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index c616a70e8cf9..959815c1e017 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -287,9 +287,6 @@ static int inotify_release(struct inode *ignored, struct file *file)
287 287
288 pr_debug("%s: group=%p\n", __func__, group); 288 pr_debug("%s: group=%p\n", __func__, group);
289 289
290 if (file->f_flags & FASYNC)
291 fsnotify_fasync(-1, file, 0);
292
293 /* free this group, matching get was inotify_init->fsnotify_obtain_group */ 290 /* free this group, matching get was inotify_init->fsnotify_obtain_group */
294 fsnotify_destroy_group(group); 291 fsnotify_destroy_group(group);
295 292
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5b2d4f0853ac..1da4b81e6f76 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2129,7 +2129,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2129 2129
2130 BUG_ON(iocb->ki_pos != pos); 2130 BUG_ON(iocb->ki_pos != pos);
2131 2131
2132 sb_start_write(inode->i_sb);
2133 mutex_lock(&inode->i_mutex); 2132 mutex_lock(&inode->i_mutex);
2134 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); 2133 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
2135 mutex_unlock(&inode->i_mutex); 2134 mutex_unlock(&inode->i_mutex);
@@ -2138,7 +2137,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2138 if (err < 0) 2137 if (err < 0)
2139 ret = err; 2138 ret = err;
2140 } 2139 }
2141 sb_end_write(inode->i_sb);
2142 return ret; 2140 return ret;
2143} 2141}
2144 2142
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6474cb44004d..8a7509f9e6f5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2248,8 +2248,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2248 if (iocb->ki_left == 0) 2248 if (iocb->ki_left == 0)
2249 return 0; 2249 return 0;
2250 2250
2251 sb_start_write(inode->i_sb);
2252
2253 appending = file->f_flags & O_APPEND ? 1 : 0; 2251 appending = file->f_flags & O_APPEND ? 1 : 0;
2254 direct_io = file->f_flags & O_DIRECT ? 1 : 0; 2252 direct_io = file->f_flags & O_DIRECT ? 1 : 0;
2255 2253
@@ -2423,7 +2421,6 @@ out_sems:
2423 ocfs2_iocb_clear_sem_locked(iocb); 2421 ocfs2_iocb_clear_sem_locked(iocb);
2424 2422
2425 mutex_unlock(&inode->i_mutex); 2423 mutex_unlock(&inode->i_mutex);
2426 sb_end_write(inode->i_sb);
2427 2424
2428 if (written) 2425 if (written)
2429 ret = written; 2426 ret = written;
@@ -2468,8 +2465,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
2468 out->f_path.dentry->d_name.len, 2465 out->f_path.dentry->d_name.len,
2469 out->f_path.dentry->d_name.name, len); 2466 out->f_path.dentry->d_name.name, len);
2470 2467
2471 if (pipe->inode) 2468 pipe_lock(pipe);
2472 mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
2473 2469
2474 splice_from_pipe_begin(&sd); 2470 splice_from_pipe_begin(&sd);
2475 do { 2471 do {
@@ -2489,8 +2485,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
2489 } while (ret > 0); 2485 } while (ret > 0);
2490 splice_from_pipe_end(pipe, &sd); 2486 splice_from_pipe_end(pipe, &sd);
2491 2487
2492 if (pipe->inode) 2488 pipe_unlock(pipe);
2493 mutex_unlock(&pipe->inode->i_mutex);
2494 2489
2495 if (sd.num_spliced) 2490 if (sd.num_spliced)
2496 ret = sd.num_spliced; 2491 ret = sd.num_spliced;
diff --git a/fs/pipe.c b/fs/pipe.c
index 2234f3f61f8d..a029a14bacf1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -25,6 +25,8 @@
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/ioctls.h> 26#include <asm/ioctls.h>
27 27
28#include "internal.h"
29
28/* 30/*
29 * The max size that a non-root user is allowed to grow the pipe. Can 31 * The max size that a non-root user is allowed to grow the pipe. Can
30 * be set by root in /proc/sys/fs/pipe-max-size 32 * be set by root in /proc/sys/fs/pipe-max-size
@@ -53,8 +55,8 @@ unsigned int pipe_min_size = PAGE_SIZE;
53 55
54static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) 56static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
55{ 57{
56 if (pipe->inode) 58 if (pipe->files)
57 mutex_lock_nested(&pipe->inode->i_mutex, subclass); 59 mutex_lock_nested(&pipe->mutex, subclass);
58} 60}
59 61
60void pipe_lock(struct pipe_inode_info *pipe) 62void pipe_lock(struct pipe_inode_info *pipe)
@@ -68,11 +70,21 @@ EXPORT_SYMBOL(pipe_lock);
68 70
69void pipe_unlock(struct pipe_inode_info *pipe) 71void pipe_unlock(struct pipe_inode_info *pipe)
70{ 72{
71 if (pipe->inode) 73 if (pipe->files)
72 mutex_unlock(&pipe->inode->i_mutex); 74 mutex_unlock(&pipe->mutex);
73} 75}
74EXPORT_SYMBOL(pipe_unlock); 76EXPORT_SYMBOL(pipe_unlock);
75 77
78static inline void __pipe_lock(struct pipe_inode_info *pipe)
79{
80 mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
81}
82
83static inline void __pipe_unlock(struct pipe_inode_info *pipe)
84{
85 mutex_unlock(&pipe->mutex);
86}
87
76void pipe_double_lock(struct pipe_inode_info *pipe1, 88void pipe_double_lock(struct pipe_inode_info *pipe1,
77 struct pipe_inode_info *pipe2) 89 struct pipe_inode_info *pipe2)
78{ 90{
@@ -361,8 +373,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
361 unsigned long nr_segs, loff_t pos) 373 unsigned long nr_segs, loff_t pos)
362{ 374{
363 struct file *filp = iocb->ki_filp; 375 struct file *filp = iocb->ki_filp;
364 struct inode *inode = file_inode(filp); 376 struct pipe_inode_info *pipe = filp->private_data;
365 struct pipe_inode_info *pipe;
366 int do_wakeup; 377 int do_wakeup;
367 ssize_t ret; 378 ssize_t ret;
368 struct iovec *iov = (struct iovec *)_iov; 379 struct iovec *iov = (struct iovec *)_iov;
@@ -375,8 +386,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
375 386
376 do_wakeup = 0; 387 do_wakeup = 0;
377 ret = 0; 388 ret = 0;
378 mutex_lock(&inode->i_mutex); 389 __pipe_lock(pipe);
379 pipe = inode->i_pipe;
380 for (;;) { 390 for (;;) {
381 int bufs = pipe->nrbufs; 391 int bufs = pipe->nrbufs;
382 if (bufs) { 392 if (bufs) {
@@ -464,7 +474,7 @@ redo:
464 } 474 }
465 pipe_wait(pipe); 475 pipe_wait(pipe);
466 } 476 }
467 mutex_unlock(&inode->i_mutex); 477 __pipe_unlock(pipe);
468 478
469 /* Signal writers asynchronously that there is more room. */ 479 /* Signal writers asynchronously that there is more room. */
470 if (do_wakeup) { 480 if (do_wakeup) {
@@ -486,8 +496,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
486 unsigned long nr_segs, loff_t ppos) 496 unsigned long nr_segs, loff_t ppos)
487{ 497{
488 struct file *filp = iocb->ki_filp; 498 struct file *filp = iocb->ki_filp;
489 struct inode *inode = file_inode(filp); 499 struct pipe_inode_info *pipe = filp->private_data;
490 struct pipe_inode_info *pipe;
491 ssize_t ret; 500 ssize_t ret;
492 int do_wakeup; 501 int do_wakeup;
493 struct iovec *iov = (struct iovec *)_iov; 502 struct iovec *iov = (struct iovec *)_iov;
@@ -501,8 +510,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
501 510
502 do_wakeup = 0; 511 do_wakeup = 0;
503 ret = 0; 512 ret = 0;
504 mutex_lock(&inode->i_mutex); 513 __pipe_lock(pipe);
505 pipe = inode->i_pipe;
506 514
507 if (!pipe->readers) { 515 if (!pipe->readers) {
508 send_sig(SIGPIPE, current, 0); 516 send_sig(SIGPIPE, current, 0);
@@ -649,7 +657,7 @@ redo2:
649 pipe->waiting_writers--; 657 pipe->waiting_writers--;
650 } 658 }
651out: 659out:
652 mutex_unlock(&inode->i_mutex); 660 __pipe_unlock(pipe);
653 if (do_wakeup) { 661 if (do_wakeup) {
654 wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); 662 wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
655 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 663 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
@@ -662,29 +670,14 @@ out:
662 return ret; 670 return ret;
663} 671}
664 672
665static ssize_t
666bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
667{
668 return -EBADF;
669}
670
671static ssize_t
672bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
673 loff_t *ppos)
674{
675 return -EBADF;
676}
677
678static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 673static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
679{ 674{
680 struct inode *inode = file_inode(filp); 675 struct pipe_inode_info *pipe = filp->private_data;
681 struct pipe_inode_info *pipe;
682 int count, buf, nrbufs; 676 int count, buf, nrbufs;
683 677
684 switch (cmd) { 678 switch (cmd) {
685 case FIONREAD: 679 case FIONREAD:
686 mutex_lock(&inode->i_mutex); 680 __pipe_lock(pipe);
687 pipe = inode->i_pipe;
688 count = 0; 681 count = 0;
689 buf = pipe->curbuf; 682 buf = pipe->curbuf;
690 nrbufs = pipe->nrbufs; 683 nrbufs = pipe->nrbufs;
@@ -692,7 +685,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
692 count += pipe->bufs[buf].len; 685 count += pipe->bufs[buf].len;
693 buf = (buf+1) & (pipe->buffers - 1); 686 buf = (buf+1) & (pipe->buffers - 1);
694 } 687 }
695 mutex_unlock(&inode->i_mutex); 688 __pipe_unlock(pipe);
696 689
697 return put_user(count, (int __user *)arg); 690 return put_user(count, (int __user *)arg);
698 default: 691 default:
@@ -705,8 +698,7 @@ static unsigned int
705pipe_poll(struct file *filp, poll_table *wait) 698pipe_poll(struct file *filp, poll_table *wait)
706{ 699{
707 unsigned int mask; 700 unsigned int mask;
708 struct inode *inode = file_inode(filp); 701 struct pipe_inode_info *pipe = filp->private_data;
709 struct pipe_inode_info *pipe = inode->i_pipe;
710 int nrbufs; 702 int nrbufs;
711 703
712 poll_wait(filp, &pipe->wait, wait); 704 poll_wait(filp, &pipe->wait, wait);
@@ -734,197 +726,56 @@ pipe_poll(struct file *filp, poll_table *wait)
734} 726}
735 727
736static int 728static int
737pipe_release(struct inode *inode, int decr, int decw) 729pipe_release(struct inode *inode, struct file *file)
738{ 730{
739 struct pipe_inode_info *pipe; 731 struct pipe_inode_info *pipe = inode->i_pipe;
732 int kill = 0;
740 733
741 mutex_lock(&inode->i_mutex); 734 __pipe_lock(pipe);
742 pipe = inode->i_pipe; 735 if (file->f_mode & FMODE_READ)
743 pipe->readers -= decr; 736 pipe->readers--;
744 pipe->writers -= decw; 737 if (file->f_mode & FMODE_WRITE)
738 pipe->writers--;
745 739
746 if (!pipe->readers && !pipe->writers) { 740 if (pipe->readers || pipe->writers) {
747 free_pipe_info(inode);
748 } else {
749 wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP); 741 wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
750 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 742 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
751 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 743 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
752 } 744 }
753 mutex_unlock(&inode->i_mutex); 745 spin_lock(&inode->i_lock);
754 746 if (!--pipe->files) {
755 return 0; 747 inode->i_pipe = NULL;
756} 748 kill = 1;
757 749 }
758static int 750 spin_unlock(&inode->i_lock);
759pipe_read_fasync(int fd, struct file *filp, int on) 751 __pipe_unlock(pipe);
760{
761 struct inode *inode = file_inode(filp);
762 int retval;
763
764 mutex_lock(&inode->i_mutex);
765 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
766 mutex_unlock(&inode->i_mutex);
767
768 return retval;
769}
770
771
772static int
773pipe_write_fasync(int fd, struct file *filp, int on)
774{
775 struct inode *inode = file_inode(filp);
776 int retval;
777 752
778 mutex_lock(&inode->i_mutex); 753 if (kill)
779 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers); 754 free_pipe_info(pipe);
780 mutex_unlock(&inode->i_mutex);
781 755
782 return retval; 756 return 0;
783} 757}
784 758
785
786static int 759static int
787pipe_rdwr_fasync(int fd, struct file *filp, int on) 760pipe_fasync(int fd, struct file *filp, int on)
788{ 761{
789 struct inode *inode = file_inode(filp); 762 struct pipe_inode_info *pipe = filp->private_data;
790 struct pipe_inode_info *pipe = inode->i_pipe; 763 int retval = 0;
791 int retval;
792 764
793 mutex_lock(&inode->i_mutex); 765 __pipe_lock(pipe);
794 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); 766 if (filp->f_mode & FMODE_READ)
795 if (retval >= 0) { 767 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
768 if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
796 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); 769 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
797 if (retval < 0) /* this can happen only if on == T */ 770 if (retval < 0 && (filp->f_mode & FMODE_READ))
771 /* this can happen only if on == T */
798 fasync_helper(-1, filp, 0, &pipe->fasync_readers); 772 fasync_helper(-1, filp, 0, &pipe->fasync_readers);
799 } 773 }
800 mutex_unlock(&inode->i_mutex); 774 __pipe_unlock(pipe);
801 return retval; 775 return retval;
802} 776}
803 777
804 778struct pipe_inode_info *alloc_pipe_info(void)
805static int
806pipe_read_release(struct inode *inode, struct file *filp)
807{
808 return pipe_release(inode, 1, 0);
809}
810
811static int
812pipe_write_release(struct inode *inode, struct file *filp)
813{
814 return pipe_release(inode, 0, 1);
815}
816
817static int
818pipe_rdwr_release(struct inode *inode, struct file *filp)
819{
820 int decr, decw;
821
822 decr = (filp->f_mode & FMODE_READ) != 0;
823 decw = (filp->f_mode & FMODE_WRITE) != 0;
824 return pipe_release(inode, decr, decw);
825}
826
827static int
828pipe_read_open(struct inode *inode, struct file *filp)
829{
830 int ret = -ENOENT;
831
832 mutex_lock(&inode->i_mutex);
833
834 if (inode->i_pipe) {
835 ret = 0;
836 inode->i_pipe->readers++;
837 }
838
839 mutex_unlock(&inode->i_mutex);
840
841 return ret;
842}
843
844static int
845pipe_write_open(struct inode *inode, struct file *filp)
846{
847 int ret = -ENOENT;
848
849 mutex_lock(&inode->i_mutex);
850
851 if (inode->i_pipe) {
852 ret = 0;
853 inode->i_pipe->writers++;
854 }
855
856 mutex_unlock(&inode->i_mutex);
857
858 return ret;
859}
860
861static int
862pipe_rdwr_open(struct inode *inode, struct file *filp)
863{
864 int ret = -ENOENT;
865
866 if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
867 return -EINVAL;
868
869 mutex_lock(&inode->i_mutex);
870
871 if (inode->i_pipe) {
872 ret = 0;
873 if (filp->f_mode & FMODE_READ)
874 inode->i_pipe->readers++;
875 if (filp->f_mode & FMODE_WRITE)
876 inode->i_pipe->writers++;
877 }
878
879 mutex_unlock(&inode->i_mutex);
880
881 return ret;
882}
883
884/*
885 * The file_operations structs are not static because they
886 * are also used in linux/fs/fifo.c to do operations on FIFOs.
887 *
888 * Pipes reuse fifos' file_operations structs.
889 */
890const struct file_operations read_pipefifo_fops = {
891 .llseek = no_llseek,
892 .read = do_sync_read,
893 .aio_read = pipe_read,
894 .write = bad_pipe_w,
895 .poll = pipe_poll,
896 .unlocked_ioctl = pipe_ioctl,
897 .open = pipe_read_open,
898 .release = pipe_read_release,
899 .fasync = pipe_read_fasync,
900};
901
902const struct file_operations write_pipefifo_fops = {
903 .llseek = no_llseek,
904 .read = bad_pipe_r,
905 .write = do_sync_write,
906 .aio_write = pipe_write,
907 .poll = pipe_poll,
908 .unlocked_ioctl = pipe_ioctl,
909 .open = pipe_write_open,
910 .release = pipe_write_release,
911 .fasync = pipe_write_fasync,
912};
913
914const struct file_operations rdwr_pipefifo_fops = {
915 .llseek = no_llseek,
916 .read = do_sync_read,
917 .aio_read = pipe_read,
918 .write = do_sync_write,
919 .aio_write = pipe_write,
920 .poll = pipe_poll,
921 .unlocked_ioctl = pipe_ioctl,
922 .open = pipe_rdwr_open,
923 .release = pipe_rdwr_release,
924 .fasync = pipe_rdwr_fasync,
925};
926
927struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
928{ 779{
929 struct pipe_inode_info *pipe; 780 struct pipe_inode_info *pipe;
930 781
@@ -934,8 +785,8 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
934 if (pipe->bufs) { 785 if (pipe->bufs) {
935 init_waitqueue_head(&pipe->wait); 786 init_waitqueue_head(&pipe->wait);
936 pipe->r_counter = pipe->w_counter = 1; 787 pipe->r_counter = pipe->w_counter = 1;
937 pipe->inode = inode;
938 pipe->buffers = PIPE_DEF_BUFFERS; 788 pipe->buffers = PIPE_DEF_BUFFERS;
789 mutex_init(&pipe->mutex);
939 return pipe; 790 return pipe;
940 } 791 }
941 kfree(pipe); 792 kfree(pipe);
@@ -944,7 +795,7 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
944 return NULL; 795 return NULL;
945} 796}
946 797
947void __free_pipe_info(struct pipe_inode_info *pipe) 798void free_pipe_info(struct pipe_inode_info *pipe)
948{ 799{
949 int i; 800 int i;
950 801
@@ -959,12 +810,6 @@ void __free_pipe_info(struct pipe_inode_info *pipe)
959 kfree(pipe); 810 kfree(pipe);
960} 811}
961 812
962void free_pipe_info(struct inode *inode)
963{
964 __free_pipe_info(inode->i_pipe);
965 inode->i_pipe = NULL;
966}
967
968static struct vfsmount *pipe_mnt __read_mostly; 813static struct vfsmount *pipe_mnt __read_mostly;
969 814
970/* 815/*
@@ -990,13 +835,14 @@ static struct inode * get_pipe_inode(void)
990 835
991 inode->i_ino = get_next_ino(); 836 inode->i_ino = get_next_ino();
992 837
993 pipe = alloc_pipe_info(inode); 838 pipe = alloc_pipe_info();
994 if (!pipe) 839 if (!pipe)
995 goto fail_iput; 840 goto fail_iput;
996 inode->i_pipe = pipe;
997 841
842 inode->i_pipe = pipe;
843 pipe->files = 2;
998 pipe->readers = pipe->writers = 1; 844 pipe->readers = pipe->writers = 1;
999 inode->i_fop = &rdwr_pipefifo_fops; 845 inode->i_fop = &pipefifo_fops;
1000 846
1001 /* 847 /*
1002 * Mark the inode dirty from the very beginning, 848 * Mark the inode dirty from the very beginning,
@@ -1039,17 +885,19 @@ int create_pipe_files(struct file **res, int flags)
1039 d_instantiate(path.dentry, inode); 885 d_instantiate(path.dentry, inode);
1040 886
1041 err = -ENFILE; 887 err = -ENFILE;
1042 f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); 888 f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
1043 if (IS_ERR(f)) 889 if (IS_ERR(f))
1044 goto err_dentry; 890 goto err_dentry;
1045 891
1046 f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); 892 f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
893 f->private_data = inode->i_pipe;
1047 894
1048 res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops); 895 res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
1049 if (IS_ERR(res[0])) 896 if (IS_ERR(res[0]))
1050 goto err_file; 897 goto err_file;
1051 898
1052 path_get(&path); 899 path_get(&path);
900 res[0]->private_data = inode->i_pipe;
1053 res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); 901 res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1054 res[1] = f; 902 res[1] = f;
1055 return 0; 903 return 0;
@@ -1057,12 +905,12 @@ int create_pipe_files(struct file **res, int flags)
1057err_file: 905err_file:
1058 put_filp(f); 906 put_filp(f);
1059err_dentry: 907err_dentry:
1060 free_pipe_info(inode); 908 free_pipe_info(inode->i_pipe);
1061 path_put(&path); 909 path_put(&path);
1062 return err; 910 return err;
1063 911
1064err_inode: 912err_inode:
1065 free_pipe_info(inode); 913 free_pipe_info(inode->i_pipe);
1066 iput(inode); 914 iput(inode);
1067 return err; 915 return err;
1068} 916}
@@ -1144,6 +992,168 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
1144 return sys_pipe2(fildes, 0); 992 return sys_pipe2(fildes, 0);
1145} 993}
1146 994
995static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
996{
997 int cur = *cnt;
998
999 while (cur == *cnt) {
1000 pipe_wait(pipe);
1001 if (signal_pending(current))
1002 break;
1003 }
1004 return cur == *cnt ? -ERESTARTSYS : 0;
1005}
1006
1007static void wake_up_partner(struct pipe_inode_info *pipe)
1008{
1009 wake_up_interruptible(&pipe->wait);
1010}
1011
1012static int fifo_open(struct inode *inode, struct file *filp)
1013{
1014 struct pipe_inode_info *pipe;
1015 bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
1016 int kill = 0;
1017 int ret;
1018
1019 filp->f_version = 0;
1020
1021 spin_lock(&inode->i_lock);
1022 if (inode->i_pipe) {
1023 pipe = inode->i_pipe;
1024 pipe->files++;
1025 spin_unlock(&inode->i_lock);
1026 } else {
1027 spin_unlock(&inode->i_lock);
1028 pipe = alloc_pipe_info();
1029 if (!pipe)
1030 return -ENOMEM;
1031 pipe->files = 1;
1032 spin_lock(&inode->i_lock);
1033 if (unlikely(inode->i_pipe)) {
1034 inode->i_pipe->files++;
1035 spin_unlock(&inode->i_lock);
1036 free_pipe_info(pipe);
1037 pipe = inode->i_pipe;
1038 } else {
1039 inode->i_pipe = pipe;
1040 spin_unlock(&inode->i_lock);
1041 }
1042 }
1043 filp->private_data = pipe;
1044 /* OK, we have a pipe and it's pinned down */
1045
1046 __pipe_lock(pipe);
1047
1048 /* We can only do regular read/write on fifos */
1049 filp->f_mode &= (FMODE_READ | FMODE_WRITE);
1050
1051 switch (filp->f_mode) {
1052 case FMODE_READ:
1053 /*
1054 * O_RDONLY
1055 * POSIX.1 says that O_NONBLOCK means return with the FIFO
1056 * opened, even when there is no process writing the FIFO.
1057 */
1058 pipe->r_counter++;
1059 if (pipe->readers++ == 0)
1060 wake_up_partner(pipe);
1061
1062 if (!is_pipe && !pipe->writers) {
1063 if ((filp->f_flags & O_NONBLOCK)) {
1064 /* suppress POLLHUP until we have
1065 * seen a writer */
1066 filp->f_version = pipe->w_counter;
1067 } else {
1068 if (wait_for_partner(pipe, &pipe->w_counter))
1069 goto err_rd;
1070 }
1071 }
1072 break;
1073
1074 case FMODE_WRITE:
1075 /*
1076 * O_WRONLY
1077 * POSIX.1 says that O_NONBLOCK means return -1 with
1078 * errno=ENXIO when there is no process reading the FIFO.
1079 */
1080 ret = -ENXIO;
1081 if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
1082 goto err;
1083
1084 pipe->w_counter++;
1085 if (!pipe->writers++)
1086 wake_up_partner(pipe);
1087
1088 if (!is_pipe && !pipe->readers) {
1089 if (wait_for_partner(pipe, &pipe->r_counter))
1090 goto err_wr;
1091 }
1092 break;
1093
1094 case FMODE_READ | FMODE_WRITE:
1095 /*
1096 * O_RDWR
1097 * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
1098 * This implementation will NEVER block on a O_RDWR open, since
1099 * the process can at least talk to itself.
1100 */
1101
1102 pipe->readers++;
1103 pipe->writers++;
1104 pipe->r_counter++;
1105 pipe->w_counter++;
1106 if (pipe->readers == 1 || pipe->writers == 1)
1107 wake_up_partner(pipe);
1108 break;
1109
1110 default:
1111 ret = -EINVAL;
1112 goto err;
1113 }
1114
1115 /* Ok! */
1116 __pipe_unlock(pipe);
1117 return 0;
1118
1119err_rd:
1120 if (!--pipe->readers)
1121 wake_up_interruptible(&pipe->wait);
1122 ret = -ERESTARTSYS;
1123 goto err;
1124
1125err_wr:
1126 if (!--pipe->writers)
1127 wake_up_interruptible(&pipe->wait);
1128 ret = -ERESTARTSYS;
1129 goto err;
1130
1131err:
1132 spin_lock(&inode->i_lock);
1133 if (!--pipe->files) {
1134 inode->i_pipe = NULL;
1135 kill = 1;
1136 }
1137 spin_unlock(&inode->i_lock);
1138 __pipe_unlock(pipe);
1139 if (kill)
1140 free_pipe_info(pipe);
1141 return ret;
1142}
1143
1144const struct file_operations pipefifo_fops = {
1145 .open = fifo_open,
1146 .llseek = no_llseek,
1147 .read = do_sync_read,
1148 .aio_read = pipe_read,
1149 .write = do_sync_write,
1150 .aio_write = pipe_write,
1151 .poll = pipe_poll,
1152 .unlocked_ioctl = pipe_ioctl,
1153 .release = pipe_release,
1154 .fasync = pipe_fasync,
1155};
1156
1147/* 1157/*
1148 * Allocate a new array of pipe buffers and copy the info over. Returns the 1158 * Allocate a new array of pipe buffers and copy the info over. Returns the
1149 * pipe size if successful, or return -ERROR on error. 1159 * pipe size if successful, or return -ERROR on error.
@@ -1229,9 +1239,7 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
1229 */ 1239 */
1230struct pipe_inode_info *get_pipe_info(struct file *file) 1240struct pipe_inode_info *get_pipe_info(struct file *file)
1231{ 1241{
1232 struct inode *i = file_inode(file); 1242 return file->f_op == &pipefifo_fops ? file->private_data : NULL;
1233
1234 return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
1235} 1243}
1236 1244
1237long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) 1245long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1243,7 +1251,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1243 if (!pipe) 1251 if (!pipe)
1244 return -EBADF; 1252 return -EBADF;
1245 1253
1246 mutex_lock(&pipe->inode->i_mutex); 1254 __pipe_lock(pipe);
1247 1255
1248 switch (cmd) { 1256 switch (cmd) {
1249 case F_SETPIPE_SZ: { 1257 case F_SETPIPE_SZ: {
@@ -1272,7 +1280,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
1272 } 1280 }
1273 1281
1274out: 1282out:
1275 mutex_unlock(&pipe->inode->i_mutex); 1283 __pipe_unlock(pipe);
1276 return ret; 1284 return ret;
1277} 1285}
1278 1286
diff --git a/fs/pnode.c b/fs/pnode.c
index 8b29d2164da6..3d2a7141b87a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -218,7 +218,7 @@ static struct mount *get_source(struct mount *dest,
218 * @source_mnt: source mount. 218 * @source_mnt: source mount.
219 * @tree_list : list of heads of trees to be attached. 219 * @tree_list : list of heads of trees to be attached.
220 */ 220 */
221int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, 221int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
222 struct mount *source_mnt, struct list_head *tree_list) 222 struct mount *source_mnt, struct list_head *tree_list)
223{ 223{
224 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; 224 struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
@@ -227,7 +227,6 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
227 struct mount *prev_dest_mnt = dest_mnt; 227 struct mount *prev_dest_mnt = dest_mnt;
228 struct mount *prev_src_mnt = source_mnt; 228 struct mount *prev_src_mnt = source_mnt;
229 LIST_HEAD(tmp_list); 229 LIST_HEAD(tmp_list);
230 LIST_HEAD(umount_list);
231 230
232 for (m = propagation_next(dest_mnt, dest_mnt); m; 231 for (m = propagation_next(dest_mnt, dest_mnt); m;
233 m = propagation_next(m, dest_mnt)) { 232 m = propagation_next(m, dest_mnt)) {
@@ -250,8 +249,8 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
250 goto out; 249 goto out;
251 } 250 }
252 251
253 if (is_subdir(dest_dentry, m->mnt.mnt_root)) { 252 if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
254 mnt_set_mountpoint(m, dest_dentry, child); 253 mnt_set_mountpoint(m, dest_mp, child);
255 list_add_tail(&child->mnt_hash, tree_list); 254 list_add_tail(&child->mnt_hash, tree_list);
256 } else { 255 } else {
257 /* 256 /*
@@ -267,10 +266,9 @@ out:
267 br_write_lock(&vfsmount_lock); 266 br_write_lock(&vfsmount_lock);
268 while (!list_empty(&tmp_list)) { 267 while (!list_empty(&tmp_list)) {
269 child = list_first_entry(&tmp_list, struct mount, mnt_hash); 268 child = list_first_entry(&tmp_list, struct mount, mnt_hash);
270 umount_tree(child, 0, &umount_list); 269 umount_tree(child, 0);
271 } 270 }
272 br_write_unlock(&vfsmount_lock); 271 br_write_unlock(&vfsmount_lock);
273 release_mounts(&umount_list);
274 return ret; 272 return ret;
275} 273}
276 274
diff --git a/fs/pnode.h b/fs/pnode.h
index a0493d5ebfbf..b091445c1c4a 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -32,17 +32,16 @@ static inline void set_mnt_shared(struct mount *mnt)
32} 32}
33 33
34void change_mnt_propagation(struct mount *, int); 34void change_mnt_propagation(struct mount *, int);
35int propagate_mnt(struct mount *, struct dentry *, struct mount *, 35int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
36 struct list_head *); 36 struct list_head *);
37int propagate_umount(struct list_head *); 37int propagate_umount(struct list_head *);
38int propagate_mount_busy(struct mount *, int); 38int propagate_mount_busy(struct mount *, int);
39void mnt_release_group_id(struct mount *); 39void mnt_release_group_id(struct mount *);
40int get_dominating_id(struct mount *mnt, const struct path *root); 40int get_dominating_id(struct mount *mnt, const struct path *root);
41unsigned int mnt_get_count(struct mount *mnt); 41unsigned int mnt_get_count(struct mount *mnt);
42void mnt_set_mountpoint(struct mount *, struct dentry *, 42void mnt_set_mountpoint(struct mount *, struct mountpoint *,
43 struct mount *); 43 struct mount *);
44void release_mounts(struct list_head *); 44void umount_tree(struct mount *, int);
45void umount_tree(struct mount *, int, struct list_head *);
46struct mount *copy_tree(struct mount *, struct dentry *, int); 45struct mount *copy_tree(struct mount *, struct dentry *, int);
47bool is_path_reachable(struct mount *, struct dentry *, 46bool is_path_reachable(struct mount *, struct dentry *,
48 const struct path *root); 47 const struct path *root);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3861bcec41ff..dd51e50001fe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -405,6 +405,37 @@ static const struct file_operations proc_lstats_operations = {
405 405
406#endif 406#endif
407 407
408#ifdef CONFIG_CGROUPS
409static int cgroup_open(struct inode *inode, struct file *file)
410{
411 struct pid *pid = PROC_I(inode)->pid;
412 return single_open(file, proc_cgroup_show, pid);
413}
414
415static const struct file_operations proc_cgroup_operations = {
416 .open = cgroup_open,
417 .read = seq_read,
418 .llseek = seq_lseek,
419 .release = single_release,
420};
421#endif
422
423#ifdef CONFIG_PROC_PID_CPUSET
424
425static int cpuset_open(struct inode *inode, struct file *file)
426{
427 struct pid *pid = PROC_I(inode)->pid;
428 return single_open(file, proc_cpuset_show, pid);
429}
430
431static const struct file_operations proc_cpuset_operations = {
432 .open = cpuset_open,
433 .read = seq_read,
434 .llseek = seq_lseek,
435 .release = single_release,
436};
437#endif
438
408static int proc_oom_score(struct task_struct *task, char *buffer) 439static int proc_oom_score(struct task_struct *task, char *buffer)
409{ 440{
410 unsigned long totalpages = totalram_pages + total_swap_pages; 441 unsigned long totalpages = totalram_pages + total_swap_pages;
@@ -1621,6 +1652,15 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
1621 return 0; 1652 return 0;
1622} 1653}
1623 1654
1655int pid_delete_dentry(const struct dentry *dentry)
1656{
1657 /* Is the task we represent dead?
1658 * If so, then don't put the dentry on the lru list,
1659 * kill it immediately.
1660 */
1661 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
1662}
1663
1624const struct dentry_operations pid_dentry_operations = 1664const struct dentry_operations pid_dentry_operations =
1625{ 1665{
1626 .d_revalidate = pid_revalidate, 1666 .d_revalidate = pid_revalidate,
@@ -2893,7 +2933,7 @@ retry:
2893 return iter; 2933 return iter;
2894} 2934}
2895 2935
2896#define TGID_OFFSET (FIRST_PROCESS_ENTRY) 2936#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1)
2897 2937
2898static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 2938static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
2899 struct tgid_iter iter) 2939 struct tgid_iter iter)
@@ -2916,13 +2956,21 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2916 struct tgid_iter iter; 2956 struct tgid_iter iter;
2917 struct pid_namespace *ns; 2957 struct pid_namespace *ns;
2918 filldir_t __filldir; 2958 filldir_t __filldir;
2959 loff_t pos = filp->f_pos;
2919 2960
2920 if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) 2961 if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
2921 goto out; 2962 goto out;
2922 2963
2923 ns = filp->f_dentry->d_sb->s_fs_info; 2964 if (pos == TGID_OFFSET - 1) {
2965 if (proc_fill_cache(filp, dirent, filldir, "self", 4,
2966 NULL, NULL, NULL) < 0)
2967 goto out;
2968 iter.tgid = 0;
2969 } else {
2970 iter.tgid = pos - TGID_OFFSET;
2971 }
2924 iter.task = NULL; 2972 iter.task = NULL;
2925 iter.tgid = filp->f_pos - TGID_OFFSET; 2973 ns = filp->f_dentry->d_sb->s_fs_info;
2926 for (iter = next_tgid(ns, iter); 2974 for (iter = next_tgid(ns, iter);
2927 iter.task; 2975 iter.task;
2928 iter.tgid += 1, iter = next_tgid(ns, iter)) { 2976 iter.tgid += 1, iter = next_tgid(ns, iter)) {
diff --git a/fs/proc/fd.h b/fs/proc/fd.h
index cbb1d47deda8..7c047f256ae2 100644
--- a/fs/proc/fd.h
+++ b/fs/proc/fd.h
@@ -11,4 +11,9 @@ extern const struct inode_operations proc_fdinfo_inode_operations;
11 11
12extern int proc_fd_permission(struct inode *inode, int mask); 12extern int proc_fd_permission(struct inode *inode, int mask);
13 13
14static inline int proc_fd(struct inode *inode)
15{
16 return PROC_I(inode)->fd;
17}
18
14#endif /* __PROCFS_FD_H__ */ 19#endif /* __PROCFS_FD_H__ */
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 21e1a8f1659d..a2596afffae6 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -36,212 +36,6 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry
36 return !memcmp(name, de->name, len); 36 return !memcmp(name, de->name, len);
37} 37}
38 38
39/* buffer size is one page but our output routines use some slack for overruns */
40#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
41
42static ssize_t
43__proc_file_read(struct file *file, char __user *buf, size_t nbytes,
44 loff_t *ppos)
45{
46 struct inode * inode = file_inode(file);
47 char *page;
48 ssize_t retval=0;
49 int eof=0;
50 ssize_t n, count;
51 char *start;
52 struct proc_dir_entry * dp;
53 unsigned long long pos;
54
55 /*
56 * Gaah, please just use "seq_file" instead. The legacy /proc
57 * interfaces cut loff_t down to off_t for reads, and ignore
58 * the offset entirely for writes..
59 */
60 pos = *ppos;
61 if (pos > MAX_NON_LFS)
62 return 0;
63 if (nbytes > MAX_NON_LFS - pos)
64 nbytes = MAX_NON_LFS - pos;
65
66 dp = PDE(inode);
67 if (!(page = (char*) __get_free_page(GFP_TEMPORARY)))
68 return -ENOMEM;
69
70 while ((nbytes > 0) && !eof) {
71 count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
72
73 start = NULL;
74 if (dp->read_proc) {
75 /*
76 * How to be a proc read function
77 * ------------------------------
78 * Prototype:
79 * int f(char *buffer, char **start, off_t offset,
80 * int count, int *peof, void *dat)
81 *
82 * Assume that the buffer is "count" bytes in size.
83 *
84 * If you know you have supplied all the data you
85 * have, set *peof.
86 *
87 * You have three ways to return data:
88 * 0) Leave *start = NULL. (This is the default.)
89 * Put the data of the requested offset at that
90 * offset within the buffer. Return the number (n)
91 * of bytes there are from the beginning of the
92 * buffer up to the last byte of data. If the
93 * number of supplied bytes (= n - offset) is
94 * greater than zero and you didn't signal eof
95 * and the reader is prepared to take more data
96 * you will be called again with the requested
97 * offset advanced by the number of bytes
98 * absorbed. This interface is useful for files
99 * no larger than the buffer.
100 * 1) Set *start = an unsigned long value less than
101 * the buffer address but greater than zero.
102 * Put the data of the requested offset at the
103 * beginning of the buffer. Return the number of
104 * bytes of data placed there. If this number is
105 * greater than zero and you didn't signal eof
106 * and the reader is prepared to take more data
107 * you will be called again with the requested
108 * offset advanced by *start. This interface is
109 * useful when you have a large file consisting
110 * of a series of blocks which you want to count
111 * and return as wholes.
112 * (Hack by Paul.Russell@rustcorp.com.au)
113 * 2) Set *start = an address within the buffer.
114 * Put the data of the requested offset at *start.
115 * Return the number of bytes of data placed there.
116 * If this number is greater than zero and you
117 * didn't signal eof and the reader is prepared to
118 * take more data you will be called again with the
119 * requested offset advanced by the number of bytes
120 * absorbed.
121 */
122 n = dp->read_proc(page, &start, *ppos,
123 count, &eof, dp->data);
124 } else
125 break;
126
127 if (n == 0) /* end of file */
128 break;
129 if (n < 0) { /* error */
130 if (retval == 0)
131 retval = n;
132 break;
133 }
134
135 if (start == NULL) {
136 if (n > PAGE_SIZE) /* Apparent buffer overflow */
137 n = PAGE_SIZE;
138 n -= *ppos;
139 if (n <= 0)
140 break;
141 if (n > count)
142 n = count;
143 start = page + *ppos;
144 } else if (start < page) {
145 if (n > PAGE_SIZE) /* Apparent buffer overflow */
146 n = PAGE_SIZE;
147 if (n > count) {
148 /*
149 * Don't reduce n because doing so might
150 * cut off part of a data block.
151 */
152 pr_warn("proc_file_read: count exceeded\n");
153 }
154 } else /* start >= page */ {
155 unsigned long startoff = (unsigned long)(start - page);
156 if (n > (PAGE_SIZE - startoff)) /* buffer overflow? */
157 n = PAGE_SIZE - startoff;
158 if (n > count)
159 n = count;
160 }
161
162 n -= copy_to_user(buf, start < page ? page : start, n);
163 if (n == 0) {
164 if (retval == 0)
165 retval = -EFAULT;
166 break;
167 }
168
169 *ppos += start < page ? (unsigned long)start : n;
170 nbytes -= n;
171 buf += n;
172 retval += n;
173 }
174 free_page((unsigned long) page);
175 return retval;
176}
177
178static ssize_t
179proc_file_read(struct file *file, char __user *buf, size_t nbytes,
180 loff_t *ppos)
181{
182 struct proc_dir_entry *pde = PDE(file_inode(file));
183 ssize_t rv = -EIO;
184
185 spin_lock(&pde->pde_unload_lock);
186 if (!pde->proc_fops) {
187 spin_unlock(&pde->pde_unload_lock);
188 return rv;
189 }
190 pde->pde_users++;
191 spin_unlock(&pde->pde_unload_lock);
192
193 rv = __proc_file_read(file, buf, nbytes, ppos);
194
195 pde_users_dec(pde);
196 return rv;
197}
198
199static ssize_t
200proc_file_write(struct file *file, const char __user *buffer,
201 size_t count, loff_t *ppos)
202{
203 struct proc_dir_entry *pde = PDE(file_inode(file));
204 ssize_t rv = -EIO;
205
206 if (pde->write_proc) {
207 spin_lock(&pde->pde_unload_lock);
208 if (!pde->proc_fops) {
209 spin_unlock(&pde->pde_unload_lock);
210 return rv;
211 }
212 pde->pde_users++;
213 spin_unlock(&pde->pde_unload_lock);
214
215 /* FIXME: does this routine need ppos? probably... */
216 rv = pde->write_proc(file, buffer, count, pde->data);
217 pde_users_dec(pde);
218 }
219 return rv;
220}
221
222
223static loff_t
224proc_file_lseek(struct file *file, loff_t offset, int orig)
225{
226 loff_t retval = -EINVAL;
227 switch (orig) {
228 case 1:
229 offset += file->f_pos;
230 /* fallthrough */
231 case 0:
232 if (offset < 0 || offset > MAX_NON_LFS)
233 break;
234 file->f_pos = retval = offset;
235 }
236 return retval;
237}
238
239static const struct file_operations proc_file_operations = {
240 .llseek = proc_file_lseek,
241 .read = proc_file_read,
242 .write = proc_file_write,
243};
244
245static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) 39static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
246{ 40{
247 struct inode *inode = dentry->d_inode; 41 struct inode *inode = dentry->d_inode;
@@ -371,7 +165,7 @@ void proc_free_inum(unsigned int inum)
371 165
372static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) 166static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
373{ 167{
374 nd_set_link(nd, PDE(dentry->d_inode)->data); 168 nd_set_link(nd, __PDE_DATA(dentry->d_inode));
375 return NULL; 169 return NULL;
376} 170}
377 171
@@ -541,19 +335,17 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
541 return ret; 335 return ret;
542 336
543 if (S_ISDIR(dp->mode)) { 337 if (S_ISDIR(dp->mode)) {
544 if (dp->proc_iops == NULL) { 338 dp->proc_fops = &proc_dir_operations;
545 dp->proc_fops = &proc_dir_operations; 339 dp->proc_iops = &proc_dir_inode_operations;
546 dp->proc_iops = &proc_dir_inode_operations;
547 }
548 dir->nlink++; 340 dir->nlink++;
549 } else if (S_ISLNK(dp->mode)) { 341 } else if (S_ISLNK(dp->mode)) {
550 if (dp->proc_iops == NULL) 342 dp->proc_iops = &proc_link_inode_operations;
551 dp->proc_iops = &proc_link_inode_operations;
552 } else if (S_ISREG(dp->mode)) { 343 } else if (S_ISREG(dp->mode)) {
553 if (dp->proc_fops == NULL) 344 BUG_ON(dp->proc_fops == NULL);
554 dp->proc_fops = &proc_file_operations; 345 dp->proc_iops = &proc_file_inode_operations;
555 if (dp->proc_iops == NULL) 346 } else {
556 dp->proc_iops = &proc_file_inode_operations; 347 WARN_ON(1);
348 return -EINVAL;
557 } 349 }
558 350
559 spin_lock(&proc_subdir_lock); 351 spin_lock(&proc_subdir_lock);
@@ -636,13 +428,17 @@ struct proc_dir_entry *proc_symlink(const char *name,
636} 428}
637EXPORT_SYMBOL(proc_symlink); 429EXPORT_SYMBOL(proc_symlink);
638 430
639struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode, 431struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
640 struct proc_dir_entry *parent) 432 struct proc_dir_entry *parent, void *data)
641{ 433{
642 struct proc_dir_entry *ent; 434 struct proc_dir_entry *ent;
643 435
436 if (mode == 0)
437 mode = S_IRUGO | S_IXUGO;
438
644 ent = __proc_create(&parent, name, S_IFDIR | mode, 2); 439 ent = __proc_create(&parent, name, S_IFDIR | mode, 2);
645 if (ent) { 440 if (ent) {
441 ent->data = data;
646 if (proc_register(parent, ent) < 0) { 442 if (proc_register(parent, ent) < 0) {
647 kfree(ent); 443 kfree(ent);
648 ent = NULL; 444 ent = NULL;
@@ -650,82 +446,39 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
650 } 446 }
651 return ent; 447 return ent;
652} 448}
653EXPORT_SYMBOL(proc_mkdir_mode); 449EXPORT_SYMBOL_GPL(proc_mkdir_data);
654 450
655struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name, 451struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
656 struct proc_dir_entry *parent) 452 struct proc_dir_entry *parent)
657{ 453{
658 struct proc_dir_entry *ent; 454 return proc_mkdir_data(name, mode, parent, NULL);
659
660 ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2);
661 if (ent) {
662 ent->data = net;
663 if (proc_register(parent, ent) < 0) {
664 kfree(ent);
665 ent = NULL;
666 }
667 }
668 return ent;
669} 455}
670EXPORT_SYMBOL_GPL(proc_net_mkdir); 456EXPORT_SYMBOL(proc_mkdir_mode);
671 457
672struct proc_dir_entry *proc_mkdir(const char *name, 458struct proc_dir_entry *proc_mkdir(const char *name,
673 struct proc_dir_entry *parent) 459 struct proc_dir_entry *parent)
674{ 460{
675 return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent); 461 return proc_mkdir_data(name, 0, parent, NULL);
676} 462}
677EXPORT_SYMBOL(proc_mkdir); 463EXPORT_SYMBOL(proc_mkdir);
678 464
679struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
680 struct proc_dir_entry *parent)
681{
682 struct proc_dir_entry *ent;
683 nlink_t nlink;
684
685 if (S_ISDIR(mode)) {
686 if ((mode & S_IALLUGO) == 0)
687 mode |= S_IRUGO | S_IXUGO;
688 nlink = 2;
689 } else {
690 if ((mode & S_IFMT) == 0)
691 mode |= S_IFREG;
692 if ((mode & S_IALLUGO) == 0)
693 mode |= S_IRUGO;
694 nlink = 1;
695 }
696
697 ent = __proc_create(&parent, name, mode, nlink);
698 if (ent) {
699 if (proc_register(parent, ent) < 0) {
700 kfree(ent);
701 ent = NULL;
702 }
703 }
704 return ent;
705}
706EXPORT_SYMBOL(create_proc_entry);
707
708struct proc_dir_entry *proc_create_data(const char *name, umode_t mode, 465struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
709 struct proc_dir_entry *parent, 466 struct proc_dir_entry *parent,
710 const struct file_operations *proc_fops, 467 const struct file_operations *proc_fops,
711 void *data) 468 void *data)
712{ 469{
713 struct proc_dir_entry *pde; 470 struct proc_dir_entry *pde;
714 nlink_t nlink; 471 if ((mode & S_IFMT) == 0)
472 mode |= S_IFREG;
715 473
716 if (S_ISDIR(mode)) { 474 if (!S_ISREG(mode)) {
717 if ((mode & S_IALLUGO) == 0) 475 WARN_ON(1); /* use proc_mkdir() */
718 mode |= S_IRUGO | S_IXUGO; 476 return NULL;
719 nlink = 2;
720 } else {
721 if ((mode & S_IFMT) == 0)
722 mode |= S_IFREG;
723 if ((mode & S_IALLUGO) == 0)
724 mode |= S_IRUGO;
725 nlink = 1;
726 } 477 }
727 478
728 pde = __proc_create(&parent, name, mode, nlink); 479 if ((mode & S_IALLUGO) == 0)
480 mode |= S_IRUGO;
481 pde = __proc_create(&parent, name, mode, 1);
729 if (!pde) 482 if (!pde)
730 goto out; 483 goto out;
731 pde->proc_fops = proc_fops; 484 pde->proc_fops = proc_fops;
@@ -739,6 +492,19 @@ out:
739 return NULL; 492 return NULL;
740} 493}
741EXPORT_SYMBOL(proc_create_data); 494EXPORT_SYMBOL(proc_create_data);
495
496void proc_set_size(struct proc_dir_entry *de, loff_t size)
497{
498 de->size = size;
499}
500EXPORT_SYMBOL(proc_set_size);
501
502void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid)
503{
504 de->uid = uid;
505 de->gid = gid;
506}
507EXPORT_SYMBOL(proc_set_user);
742 508
743static void free_proc_entry(struct proc_dir_entry *de) 509static void free_proc_entry(struct proc_dir_entry *de)
744{ 510{
@@ -755,41 +521,6 @@ void pde_put(struct proc_dir_entry *pde)
755 free_proc_entry(pde); 521 free_proc_entry(pde);
756} 522}
757 523
758static void entry_rundown(struct proc_dir_entry *de)
759{
760 spin_lock(&de->pde_unload_lock);
761 /*
762 * Stop accepting new callers into module. If you're
763 * dynamically allocating ->proc_fops, save a pointer somewhere.
764 */
765 de->proc_fops = NULL;
766 /* Wait until all existing callers into module are done. */
767 if (de->pde_users > 0) {
768 DECLARE_COMPLETION_ONSTACK(c);
769
770 if (!de->pde_unload_completion)
771 de->pde_unload_completion = &c;
772
773 spin_unlock(&de->pde_unload_lock);
774
775 wait_for_completion(de->pde_unload_completion);
776
777 spin_lock(&de->pde_unload_lock);
778 }
779
780 while (!list_empty(&de->pde_openers)) {
781 struct pde_opener *pdeo;
782
783 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
784 list_del(&pdeo->lh);
785 spin_unlock(&de->pde_unload_lock);
786 pdeo->release(pdeo->inode, pdeo->file);
787 kfree(pdeo);
788 spin_lock(&de->pde_unload_lock);
789 }
790 spin_unlock(&de->pde_unload_lock);
791}
792
793/* 524/*
794 * Remove a /proc entry and free it if it's not currently in use. 525 * Remove a /proc entry and free it if it's not currently in use.
795 */ 526 */
@@ -821,7 +552,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
821 return; 552 return;
822 } 553 }
823 554
824 entry_rundown(de); 555 proc_entry_rundown(de);
825 556
826 if (S_ISDIR(de->mode)) 557 if (S_ISDIR(de->mode))
827 parent->nlink--; 558 parent->nlink--;
@@ -870,7 +601,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
870 } 601 }
871 spin_unlock(&proc_subdir_lock); 602 spin_unlock(&proc_subdir_lock);
872 603
873 entry_rundown(de); 604 proc_entry_rundown(de);
874 next = de->parent; 605 next = de->parent;
875 if (S_ISDIR(de->mode)) 606 if (S_ISDIR(de->mode))
876 next->nlink--; 607 next->nlink--;
@@ -886,3 +617,23 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
886 return 0; 617 return 0;
887} 618}
888EXPORT_SYMBOL(remove_proc_subtree); 619EXPORT_SYMBOL(remove_proc_subtree);
620
621void *proc_get_parent_data(const struct inode *inode)
622{
623 struct proc_dir_entry *de = PDE(inode);
624 return de->parent->data;
625}
626EXPORT_SYMBOL_GPL(proc_get_parent_data);
627
628void proc_remove(struct proc_dir_entry *de)
629{
630 if (de)
631 remove_proc_subtree(de->name, de->parent);
632}
633EXPORT_SYMBOL(proc_remove);
634
635void *PDE_DATA(const struct inode *inode)
636{
637 return __PDE_DATA(inode);
638}
639EXPORT_SYMBOL(PDE_DATA);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 869116c2afbe..073aea60cf8f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -22,6 +22,7 @@
22#include <linux/seq_file.h> 22#include <linux/seq_file.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/magic.h>
25 26
26#include <asm/uaccess.h> 27#include <asm/uaccess.h>
27 28
@@ -50,8 +51,8 @@ static void proc_evict_inode(struct inode *inode)
50 sysctl_head_put(head); 51 sysctl_head_put(head);
51 } 52 }
52 /* Release any associated namespace */ 53 /* Release any associated namespace */
53 ns_ops = PROC_I(inode)->ns_ops; 54 ns_ops = PROC_I(inode)->ns.ns_ops;
54 ns = PROC_I(inode)->ns; 55 ns = PROC_I(inode)->ns.ns;
55 if (ns_ops && ns) 56 if (ns_ops && ns)
56 ns_ops->put(ns); 57 ns_ops->put(ns);
57} 58}
@@ -72,8 +73,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
72 ei->pde = NULL; 73 ei->pde = NULL;
73 ei->sysctl = NULL; 74 ei->sysctl = NULL;
74 ei->sysctl_entry = NULL; 75 ei->sysctl_entry = NULL;
75 ei->ns = NULL; 76 ei->ns.ns = NULL;
76 ei->ns_ops = NULL; 77 ei->ns.ns_ops = NULL;
77 inode = &ei->vfs_inode; 78 inode = &ei->vfs_inode;
78 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 79 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
79 return inode; 80 return inode;
@@ -129,96 +130,100 @@ static const struct super_operations proc_sops = {
129 .show_options = proc_show_options, 130 .show_options = proc_show_options,
130}; 131};
131 132
132static void __pde_users_dec(struct proc_dir_entry *pde) 133enum {BIAS = -1U<<31};
134
135static inline int use_pde(struct proc_dir_entry *pde)
136{
137 return atomic_inc_unless_negative(&pde->in_use);
138}
139
140static void unuse_pde(struct proc_dir_entry *pde)
133{ 141{
134 pde->pde_users--; 142 if (atomic_dec_return(&pde->in_use) == BIAS)
135 if (pde->pde_unload_completion && pde->pde_users == 0)
136 complete(pde->pde_unload_completion); 143 complete(pde->pde_unload_completion);
137} 144}
138 145
139void pde_users_dec(struct proc_dir_entry *pde) 146/* pde is locked */
147static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
140{ 148{
141 spin_lock(&pde->pde_unload_lock); 149 if (pdeo->closing) {
142 __pde_users_dec(pde); 150 /* somebody else is doing that, just wait */
143 spin_unlock(&pde->pde_unload_lock); 151 DECLARE_COMPLETION_ONSTACK(c);
152 pdeo->c = &c;
153 spin_unlock(&pde->pde_unload_lock);
154 wait_for_completion(&c);
155 spin_lock(&pde->pde_unload_lock);
156 } else {
157 struct file *file;
158 pdeo->closing = 1;
159 spin_unlock(&pde->pde_unload_lock);
160 file = pdeo->file;
161 pde->proc_fops->release(file_inode(file), file);
162 spin_lock(&pde->pde_unload_lock);
163 list_del_init(&pdeo->lh);
164 if (pdeo->c)
165 complete(pdeo->c);
166 kfree(pdeo);
167 }
168}
169
170void proc_entry_rundown(struct proc_dir_entry *de)
171{
172 DECLARE_COMPLETION_ONSTACK(c);
173 /* Wait until all existing callers into module are done. */
174 de->pde_unload_completion = &c;
175 if (atomic_add_return(BIAS, &de->in_use) != BIAS)
176 wait_for_completion(&c);
177
178 spin_lock(&de->pde_unload_lock);
179 while (!list_empty(&de->pde_openers)) {
180 struct pde_opener *pdeo;
181 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
182 close_pdeo(de, pdeo);
183 }
184 spin_unlock(&de->pde_unload_lock);
144} 185}
145 186
146static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence) 187static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
147{ 188{
148 struct proc_dir_entry *pde = PDE(file_inode(file)); 189 struct proc_dir_entry *pde = PDE(file_inode(file));
149 loff_t rv = -EINVAL; 190 loff_t rv = -EINVAL;
150 loff_t (*llseek)(struct file *, loff_t, int); 191 if (use_pde(pde)) {
151 192 loff_t (*llseek)(struct file *, loff_t, int);
152 spin_lock(&pde->pde_unload_lock); 193 llseek = pde->proc_fops->llseek;
153 /* 194 if (!llseek)
154 * remove_proc_entry() is going to delete PDE (as part of module 195 llseek = default_llseek;
155 * cleanup sequence). No new callers into module allowed. 196 rv = llseek(file, offset, whence);
156 */ 197 unuse_pde(pde);
157 if (!pde->proc_fops) {
158 spin_unlock(&pde->pde_unload_lock);
159 return rv;
160 } 198 }
161 /*
162 * Bump refcount so that remove_proc_entry will wail for ->llseek to
163 * complete.
164 */
165 pde->pde_users++;
166 /*
167 * Save function pointer under lock, to protect against ->proc_fops
168 * NULL'ifying right after ->pde_unload_lock is dropped.
169 */
170 llseek = pde->proc_fops->llseek;
171 spin_unlock(&pde->pde_unload_lock);
172
173 if (!llseek)
174 llseek = default_llseek;
175 rv = llseek(file, offset, whence);
176
177 pde_users_dec(pde);
178 return rv; 199 return rv;
179} 200}
180 201
181static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) 202static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
182{ 203{
204 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
183 struct proc_dir_entry *pde = PDE(file_inode(file)); 205 struct proc_dir_entry *pde = PDE(file_inode(file));
184 ssize_t rv = -EIO; 206 ssize_t rv = -EIO;
185 ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); 207 if (use_pde(pde)) {
186 208 read = pde->proc_fops->read;
187 spin_lock(&pde->pde_unload_lock); 209 if (read)
188 if (!pde->proc_fops) { 210 rv = read(file, buf, count, ppos);
189 spin_unlock(&pde->pde_unload_lock); 211 unuse_pde(pde);
190 return rv;
191 } 212 }
192 pde->pde_users++;
193 read = pde->proc_fops->read;
194 spin_unlock(&pde->pde_unload_lock);
195
196 if (read)
197 rv = read(file, buf, count, ppos);
198
199 pde_users_dec(pde);
200 return rv; 213 return rv;
201} 214}
202 215
203static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) 216static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
204{ 217{
218 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
205 struct proc_dir_entry *pde = PDE(file_inode(file)); 219 struct proc_dir_entry *pde = PDE(file_inode(file));
206 ssize_t rv = -EIO; 220 ssize_t rv = -EIO;
207 ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); 221 if (use_pde(pde)) {
208 222 write = pde->proc_fops->write;
209 spin_lock(&pde->pde_unload_lock); 223 if (write)
210 if (!pde->proc_fops) { 224 rv = write(file, buf, count, ppos);
211 spin_unlock(&pde->pde_unload_lock); 225 unuse_pde(pde);
212 return rv;
213 } 226 }
214 pde->pde_users++;
215 write = pde->proc_fops->write;
216 spin_unlock(&pde->pde_unload_lock);
217
218 if (write)
219 rv = write(file, buf, count, ppos);
220
221 pde_users_dec(pde);
222 return rv; 227 return rv;
223} 228}
224 229
@@ -227,20 +232,12 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p
227 struct proc_dir_entry *pde = PDE(file_inode(file)); 232 struct proc_dir_entry *pde = PDE(file_inode(file));
228 unsigned int rv = DEFAULT_POLLMASK; 233 unsigned int rv = DEFAULT_POLLMASK;
229 unsigned int (*poll)(struct file *, struct poll_table_struct *); 234 unsigned int (*poll)(struct file *, struct poll_table_struct *);
230 235 if (use_pde(pde)) {
231 spin_lock(&pde->pde_unload_lock); 236 poll = pde->proc_fops->poll;
232 if (!pde->proc_fops) { 237 if (poll)
233 spin_unlock(&pde->pde_unload_lock); 238 rv = poll(file, pts);
234 return rv; 239 unuse_pde(pde);
235 } 240 }
236 pde->pde_users++;
237 poll = pde->proc_fops->poll;
238 spin_unlock(&pde->pde_unload_lock);
239
240 if (poll)
241 rv = poll(file, pts);
242
243 pde_users_dec(pde);
244 return rv; 241 return rv;
245} 242}
246 243
@@ -249,20 +246,12 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
249 struct proc_dir_entry *pde = PDE(file_inode(file)); 246 struct proc_dir_entry *pde = PDE(file_inode(file));
250 long rv = -ENOTTY; 247 long rv = -ENOTTY;
251 long (*ioctl)(struct file *, unsigned int, unsigned long); 248 long (*ioctl)(struct file *, unsigned int, unsigned long);
252 249 if (use_pde(pde)) {
253 spin_lock(&pde->pde_unload_lock); 250 ioctl = pde->proc_fops->unlocked_ioctl;
254 if (!pde->proc_fops) { 251 if (ioctl)
255 spin_unlock(&pde->pde_unload_lock); 252 rv = ioctl(file, cmd, arg);
256 return rv; 253 unuse_pde(pde);
257 } 254 }
258 pde->pde_users++;
259 ioctl = pde->proc_fops->unlocked_ioctl;
260 spin_unlock(&pde->pde_unload_lock);
261
262 if (ioctl)
263 rv = ioctl(file, cmd, arg);
264
265 pde_users_dec(pde);
266 return rv; 255 return rv;
267} 256}
268 257
@@ -272,20 +261,12 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned
272 struct proc_dir_entry *pde = PDE(file_inode(file)); 261 struct proc_dir_entry *pde = PDE(file_inode(file));
273 long rv = -ENOTTY; 262 long rv = -ENOTTY;
274 long (*compat_ioctl)(struct file *, unsigned int, unsigned long); 263 long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
275 264 if (use_pde(pde)) {
276 spin_lock(&pde->pde_unload_lock); 265 compat_ioctl = pde->proc_fops->compat_ioctl;
277 if (!pde->proc_fops) { 266 if (compat_ioctl)
278 spin_unlock(&pde->pde_unload_lock); 267 rv = compat_ioctl(file, cmd, arg);
279 return rv; 268 unuse_pde(pde);
280 } 269 }
281 pde->pde_users++;
282 compat_ioctl = pde->proc_fops->compat_ioctl;
283 spin_unlock(&pde->pde_unload_lock);
284
285 if (compat_ioctl)
286 rv = compat_ioctl(file, cmd, arg);
287
288 pde_users_dec(pde);
289 return rv; 270 return rv;
290} 271}
291#endif 272#endif
@@ -295,20 +276,12 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
295 struct proc_dir_entry *pde = PDE(file_inode(file)); 276 struct proc_dir_entry *pde = PDE(file_inode(file));
296 int rv = -EIO; 277 int rv = -EIO;
297 int (*mmap)(struct file *, struct vm_area_struct *); 278 int (*mmap)(struct file *, struct vm_area_struct *);
298 279 if (use_pde(pde)) {
299 spin_lock(&pde->pde_unload_lock); 280 mmap = pde->proc_fops->mmap;
300 if (!pde->proc_fops) { 281 if (mmap)
301 spin_unlock(&pde->pde_unload_lock); 282 rv = mmap(file, vma);
302 return rv; 283 unuse_pde(pde);
303 } 284 }
304 pde->pde_users++;
305 mmap = pde->proc_fops->mmap;
306 spin_unlock(&pde->pde_unload_lock);
307
308 if (mmap)
309 rv = mmap(file, vma);
310
311 pde_users_dec(pde);
312 return rv; 285 return rv;
313} 286}
314 287
@@ -330,91 +303,47 @@ static int proc_reg_open(struct inode *inode, struct file *file)
330 * by hand in remove_proc_entry(). For this, save opener's credentials 303 * by hand in remove_proc_entry(). For this, save opener's credentials
331 * for later. 304 * for later.
332 */ 305 */
333 pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL); 306 pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
334 if (!pdeo) 307 if (!pdeo)
335 return -ENOMEM; 308 return -ENOMEM;
336 309
337 spin_lock(&pde->pde_unload_lock); 310 if (!use_pde(pde)) {
338 if (!pde->proc_fops) {
339 spin_unlock(&pde->pde_unload_lock);
340 kfree(pdeo); 311 kfree(pdeo);
341 return -ENOENT; 312 return -ENOENT;
342 } 313 }
343 pde->pde_users++;
344 open = pde->proc_fops->open; 314 open = pde->proc_fops->open;
345 release = pde->proc_fops->release; 315 release = pde->proc_fops->release;
346 spin_unlock(&pde->pde_unload_lock);
347 316
348 if (open) 317 if (open)
349 rv = open(inode, file); 318 rv = open(inode, file);
350 319
351 spin_lock(&pde->pde_unload_lock);
352 if (rv == 0 && release) { 320 if (rv == 0 && release) {
353 /* To know what to release. */ 321 /* To know what to release. */
354 pdeo->inode = inode;
355 pdeo->file = file; 322 pdeo->file = file;
356 /* Strictly for "too late" ->release in proc_reg_release(). */ 323 /* Strictly for "too late" ->release in proc_reg_release(). */
357 pdeo->release = release; 324 spin_lock(&pde->pde_unload_lock);
358 list_add(&pdeo->lh, &pde->pde_openers); 325 list_add(&pdeo->lh, &pde->pde_openers);
326 spin_unlock(&pde->pde_unload_lock);
359 } else 327 } else
360 kfree(pdeo); 328 kfree(pdeo);
361 __pde_users_dec(pde);
362 spin_unlock(&pde->pde_unload_lock);
363 return rv;
364}
365
366static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
367 struct inode *inode, struct file *file)
368{
369 struct pde_opener *pdeo;
370 329
371 list_for_each_entry(pdeo, &pde->pde_openers, lh) { 330 unuse_pde(pde);
372 if (pdeo->inode == inode && pdeo->file == file) 331 return rv;
373 return pdeo;
374 }
375 return NULL;
376} 332}
377 333
378static int proc_reg_release(struct inode *inode, struct file *file) 334static int proc_reg_release(struct inode *inode, struct file *file)
379{ 335{
380 struct proc_dir_entry *pde = PDE(inode); 336 struct proc_dir_entry *pde = PDE(inode);
381 int rv = 0;
382 int (*release)(struct inode *, struct file *);
383 struct pde_opener *pdeo; 337 struct pde_opener *pdeo;
384
385 spin_lock(&pde->pde_unload_lock); 338 spin_lock(&pde->pde_unload_lock);
386 pdeo = find_pde_opener(pde, inode, file); 339 list_for_each_entry(pdeo, &pde->pde_openers, lh) {
387 if (!pde->proc_fops) { 340 if (pdeo->file == file) {
388 /* 341 close_pdeo(pde, pdeo);
389 * Can't simply exit, __fput() will think that everything is OK, 342 break;
390 * and move on to freeing struct file. remove_proc_entry() will 343 }
391 * find slacker in opener's list and will try to do non-trivial
392 * things with struct file. Therefore, remove opener from list.
393 *
394 * But if opener is removed from list, who will ->release it?
395 */
396 if (pdeo) {
397 list_del(&pdeo->lh);
398 spin_unlock(&pde->pde_unload_lock);
399 rv = pdeo->release(inode, file);
400 kfree(pdeo);
401 } else
402 spin_unlock(&pde->pde_unload_lock);
403 return rv;
404 }
405 pde->pde_users++;
406 release = pde->proc_fops->release;
407 if (pdeo) {
408 list_del(&pdeo->lh);
409 kfree(pdeo);
410 } 344 }
411 spin_unlock(&pde->pde_unload_lock); 345 spin_unlock(&pde->pde_unload_lock);
412 346 return 0;
413 if (release)
414 rv = release(inode, file);
415
416 pde_users_dec(pde);
417 return rv;
418} 347}
419 348
420static const struct file_operations proc_reg_file_ops = { 349static const struct file_operations proc_reg_file_ops = {
@@ -462,8 +391,8 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
462 inode->i_size = de->size; 391 inode->i_size = de->size;
463 if (de->nlink) 392 if (de->nlink)
464 set_nlink(inode, de->nlink); 393 set_nlink(inode, de->nlink);
465 if (de->proc_iops) 394 WARN_ON(!de->proc_iops);
466 inode->i_op = de->proc_iops; 395 inode->i_op = de->proc_iops;
467 if (de->proc_fops) { 396 if (de->proc_fops) {
468 if (S_ISREG(inode->i_mode)) { 397 if (S_ISREG(inode->i_mode)) {
469#ifdef CONFIG_COMPAT 398#ifdef CONFIG_COMPAT
@@ -506,5 +435,5 @@ int proc_fill_super(struct super_block *s)
506 return -ENOMEM; 435 return -ENOMEM;
507 } 436 }
508 437
509 return 0; 438 return proc_setup_self(s);
510} 439}
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 75710357a517..d600fb098b6a 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -1,4 +1,4 @@
1/* internal.h: internal procfs definitions 1/* Internal procfs definitions
2 * 2 *
3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. 3 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com) 4 * Written by David Howells (dhowells@redhat.com)
@@ -9,62 +9,83 @@
9 * 2 of the License, or (at your option) any later version. 9 * 2 of the License, or (at your option) any later version.
10 */ 10 */
11 11
12#include <linux/sched.h>
13#include <linux/proc_fs.h> 12#include <linux/proc_fs.h>
13#include <linux/proc_ns.h>
14#include <linux/spinlock.h>
15#include <linux/atomic.h>
14#include <linux/binfmts.h> 16#include <linux/binfmts.h>
15struct ctl_table_header;
16struct mempolicy;
17 17
18extern struct proc_dir_entry proc_root; 18struct ctl_table_header;
19extern void proc_self_init(void); 19struct mempolicy;
20#ifdef CONFIG_PROC_SYSCTL
21extern int proc_sys_init(void);
22extern void sysctl_head_put(struct ctl_table_header *head);
23#else
24static inline void proc_sys_init(void) { }
25static inline void sysctl_head_put(struct ctl_table_header *head) { }
26#endif
27#ifdef CONFIG_NET
28extern int proc_net_init(void);
29#else
30static inline int proc_net_init(void) { return 0; }
31#endif
32 20
33extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, 21/*
34 struct pid *pid, struct task_struct *task); 22 * This is not completely implemented yet. The idea is to
35extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, 23 * create an in-memory tree (like the actual /proc filesystem
36 struct pid *pid, struct task_struct *task); 24 * tree) of these proc_dir_entries, so that we can dynamically
37extern int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, 25 * add new files to /proc.
38 struct pid *pid, struct task_struct *task); 26 *
39extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns, 27 * The "next" pointer creates a linked list of one /proc directory,
40 struct pid *pid, struct task_struct *task); 28 * while parent/subdir create the directory structure (every
41extern loff_t mem_lseek(struct file *file, loff_t offset, int orig); 29 * /proc file has a parent, but "subdir" is NULL for all
30 * non-directory entries).
31 */
32struct proc_dir_entry {
33 unsigned int low_ino;
34 umode_t mode;
35 nlink_t nlink;
36 kuid_t uid;
37 kgid_t gid;
38 loff_t size;
39 const struct inode_operations *proc_iops;
40 const struct file_operations *proc_fops;
41 struct proc_dir_entry *next, *parent, *subdir;
42 void *data;
43 atomic_t count; /* use count */
44 atomic_t in_use; /* number of callers into module in progress; */
45 /* negative -> it's going away RSN */
46 struct completion *pde_unload_completion;
47 struct list_head pde_openers; /* who did ->open, but not ->release */
48 spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
49 u8 namelen;
50 char name[];
51};
42 52
43extern const struct file_operations proc_tid_children_operations; 53union proc_op {
44extern const struct file_operations proc_pid_maps_operations; 54 int (*proc_get_link)(struct dentry *, struct path *);
45extern const struct file_operations proc_tid_maps_operations; 55 int (*proc_read)(struct task_struct *task, char *page);
46extern const struct file_operations proc_pid_numa_maps_operations; 56 int (*proc_show)(struct seq_file *m,
47extern const struct file_operations proc_tid_numa_maps_operations; 57 struct pid_namespace *ns, struct pid *pid,
48extern const struct file_operations proc_pid_smaps_operations; 58 struct task_struct *task);
49extern const struct file_operations proc_tid_smaps_operations; 59};
50extern const struct file_operations proc_clear_refs_operations;
51extern const struct file_operations proc_pagemap_operations;
52extern const struct file_operations proc_net_operations;
53extern const struct inode_operations proc_net_inode_operations;
54extern const struct inode_operations proc_pid_link_inode_operations;
55 60
56struct proc_maps_private { 61struct proc_inode {
57 struct pid *pid; 62 struct pid *pid;
58 struct task_struct *task; 63 int fd;
59#ifdef CONFIG_MMU 64 union proc_op op;
60 struct vm_area_struct *tail_vma; 65 struct proc_dir_entry *pde;
61#endif 66 struct ctl_table_header *sysctl;
62#ifdef CONFIG_NUMA 67 struct ctl_table *sysctl_entry;
63 struct mempolicy *task_mempolicy; 68 struct proc_ns ns;
64#endif 69 struct inode vfs_inode;
65}; 70};
66 71
67void proc_init_inodecache(void); 72/*
73 * General functions
74 */
75static inline struct proc_inode *PROC_I(const struct inode *inode)
76{
77 return container_of(inode, struct proc_inode, vfs_inode);
78}
79
80static inline struct proc_dir_entry *PDE(const struct inode *inode)
81{
82 return PROC_I(inode)->pde;
83}
84
85static inline void *__PDE_DATA(const struct inode *inode)
86{
87 return PDE(inode)->data;
88}
68 89
69static inline struct pid *proc_pid(struct inode *inode) 90static inline struct pid *proc_pid(struct inode *inode)
70{ 91{
@@ -76,11 +97,6 @@ static inline struct task_struct *get_proc_task(struct inode *inode)
76 return get_pid_task(proc_pid(inode), PIDTYPE_PID); 97 return get_pid_task(proc_pid(inode), PIDTYPE_PID);
77} 98}
78 99
79static inline int proc_fd(struct inode *inode)
80{
81 return PROC_I(inode)->fd;
82}
83
84static inline int task_dumpable(struct task_struct *task) 100static inline int task_dumpable(struct task_struct *task)
85{ 101{
86 int dumpable = 0; 102 int dumpable = 0;
@@ -96,15 +112,6 @@ static inline int task_dumpable(struct task_struct *task)
96 return 0; 112 return 0;
97} 113}
98 114
99static inline int pid_delete_dentry(const struct dentry * dentry)
100{
101 /* Is the task we represent dead?
102 * If so, then don't put the dentry on the lru list,
103 * kill it immediately.
104 */
105 return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
106}
107
108static inline unsigned name_to_int(struct dentry *dentry) 115static inline unsigned name_to_int(struct dentry *dentry)
109{ 116{
110 const char *name = dentry->d_name.name; 117 const char *name = dentry->d_name.name;
@@ -127,63 +134,165 @@ out:
127 return ~0U; 134 return ~0U;
128} 135}
129 136
130struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino, 137/*
131 struct dentry *dentry); 138 * Offset of the first process in the /proc root directory..
132int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 139 */
133 filldir_t filldir); 140#define FIRST_PROCESS_ENTRY 256
141
142/* Worst case buffer size needed for holding an integer. */
143#define PROC_NUMBUF 13
134 144
135struct pde_opener { 145/*
136 struct inode *inode; 146 * array.c
137 struct file *file; 147 */
138 int (*release)(struct inode *, struct file *); 148extern const struct file_operations proc_tid_children_operations;
139 struct list_head lh;
140};
141void pde_users_dec(struct proc_dir_entry *pde);
142 149
150extern int proc_tid_stat(struct seq_file *, struct pid_namespace *,
151 struct pid *, struct task_struct *);
152extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *,
153 struct pid *, struct task_struct *);
154extern int proc_pid_status(struct seq_file *, struct pid_namespace *,
155 struct pid *, struct task_struct *);
156extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
157 struct pid *, struct task_struct *);
158
159/*
160 * base.c
161 */
162extern const struct dentry_operations pid_dentry_operations;
163extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *);
164extern int proc_setattr(struct dentry *, struct iattr *);
165extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *);
166extern int pid_revalidate(struct dentry *, unsigned int);
167extern int pid_delete_dentry(const struct dentry *);
168extern int proc_pid_readdir(struct file *, void *, filldir_t);
169extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int);
170extern loff_t mem_lseek(struct file *, loff_t, int);
171
172/* Lookups */
173typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
174 struct task_struct *, const void *);
175extern int proc_fill_cache(struct file *, void *, filldir_t, const char *, int,
176 instantiate_t, struct task_struct *, const void *);
177
178/*
179 * generic.c
180 */
143extern spinlock_t proc_subdir_lock; 181extern spinlock_t proc_subdir_lock;
144 182
145struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int); 183extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
146int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); 184extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
147unsigned long task_vsize(struct mm_struct *); 185 struct dentry *);
148unsigned long task_statm(struct mm_struct *, 186extern int proc_readdir(struct file *, void *, filldir_t);
149 unsigned long *, unsigned long *, unsigned long *, unsigned long *); 187extern int proc_readdir_de(struct proc_dir_entry *, struct file *, void *, filldir_t);
150void task_mem(struct seq_file *, struct mm_struct *);
151 188
152static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) 189static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
153{ 190{
154 atomic_inc(&pde->count); 191 atomic_inc(&pde->count);
155 return pde; 192 return pde;
156} 193}
157void pde_put(struct proc_dir_entry *pde); 194extern void pde_put(struct proc_dir_entry *);
158
159int proc_fill_super(struct super_block *);
160struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
161int proc_remount(struct super_block *sb, int *flags, char *data);
162 195
163/* 196/*
164 * These are generic /proc routines that use the internal 197 * inode.c
165 * "struct proc_dir_entry" tree to traverse the filesystem.
166 *
167 * The /proc root directory has extended versions to take care
168 * of the /proc/<pid> subdirectories.
169 */ 198 */
170int proc_readdir(struct file *, void *, filldir_t); 199struct pde_opener {
171struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); 200 struct file *file;
201 struct list_head lh;
202 int closing;
203 struct completion *c;
204};
172 205
206extern const struct inode_operations proc_pid_link_inode_operations;
173 207
208extern void proc_init_inodecache(void);
209extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
210extern int proc_fill_super(struct super_block *);
211extern void proc_entry_rundown(struct proc_dir_entry *);
174 212
175/* Lookups */ 213/*
176typedef struct dentry *instantiate_t(struct inode *, struct dentry *, 214 * proc_devtree.c
177 struct task_struct *, const void *); 215 */
178int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, 216#ifdef CONFIG_PROC_DEVICETREE
179 const char *name, int len, 217extern void proc_device_tree_init(void);
180 instantiate_t instantiate, struct task_struct *task, const void *ptr); 218#endif
181int pid_revalidate(struct dentry *dentry, unsigned int flags);
182struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
183extern const struct dentry_operations pid_dentry_operations;
184int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
185int proc_setattr(struct dentry *dentry, struct iattr *attr);
186 219
220/*
221 * proc_namespaces.c
222 */
187extern const struct inode_operations proc_ns_dir_inode_operations; 223extern const struct inode_operations proc_ns_dir_inode_operations;
188extern const struct file_operations proc_ns_dir_operations; 224extern const struct file_operations proc_ns_dir_operations;
189 225
226/*
227 * proc_net.c
228 */
229extern const struct file_operations proc_net_operations;
230extern const struct inode_operations proc_net_inode_operations;
231
232#ifdef CONFIG_NET
233extern int proc_net_init(void);
234#else
235static inline int proc_net_init(void) { return 0; }
236#endif
237
238/*
239 * proc_self.c
240 */
241extern int proc_setup_self(struct super_block *);
242
243/*
244 * proc_sysctl.c
245 */
246#ifdef CONFIG_PROC_SYSCTL
247extern int proc_sys_init(void);
248extern void sysctl_head_put(struct ctl_table_header *);
249#else
250static inline void proc_sys_init(void) { }
251static inline void sysctl_head_put(struct ctl_table_header *head) { }
252#endif
253
254/*
255 * proc_tty.c
256 */
257#ifdef CONFIG_TTY
258extern void proc_tty_init(void);
259#else
260static inline void proc_tty_init(void) {}
261#endif
262
263/*
264 * root.c
265 */
266extern struct proc_dir_entry proc_root;
267
268extern void proc_self_init(void);
269extern int proc_remount(struct super_block *, int *, char *);
270
271/*
272 * task_[no]mmu.c
273 */
274struct proc_maps_private {
275 struct pid *pid;
276 struct task_struct *task;
277#ifdef CONFIG_MMU
278 struct vm_area_struct *tail_vma;
279#endif
280#ifdef CONFIG_NUMA
281 struct mempolicy *task_mempolicy;
282#endif
283};
284
285extern const struct file_operations proc_pid_maps_operations;
286extern const struct file_operations proc_tid_maps_operations;
287extern const struct file_operations proc_pid_numa_maps_operations;
288extern const struct file_operations proc_tid_numa_maps_operations;
289extern const struct file_operations proc_pid_smaps_operations;
290extern const struct file_operations proc_tid_smaps_operations;
291extern const struct file_operations proc_clear_refs_operations;
292extern const struct file_operations proc_pagemap_operations;
293
294extern unsigned long task_vsize(struct mm_struct *);
295extern unsigned long task_statm(struct mm_struct *,
296 unsigned long *, unsigned long *,
297 unsigned long *, unsigned long *);
298extern void task_mem(struct seq_file *, struct mm_struct *);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index f6a13f489e30..0a22194e5d58 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -11,6 +11,7 @@
11 11
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/proc_fs.h> 13#include <linux/proc_fs.h>
14#include <linux/kcore.h>
14#include <linux/user.h> 15#include <linux/user.h>
15#include <linux/capability.h> 16#include <linux/capability.h>
16#include <linux/elf.h> 17#include <linux/elf.h>
@@ -28,6 +29,7 @@
28#include <linux/ioport.h> 29#include <linux/ioport.h>
29#include <linux/memory.h> 30#include <linux/memory.h>
30#include <asm/sections.h> 31#include <asm/sections.h>
32#include "internal.h"
31 33
32#define CORE_STR "CORE" 34#define CORE_STR "CORE"
33 35
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 66b51c0383da..54bdc6701e9f 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -51,7 +51,7 @@ static int ns_delete_dentry(const struct dentry *dentry)
51static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) 51static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
52{ 52{
53 struct inode *inode = dentry->d_inode; 53 struct inode *inode = dentry->d_inode;
54 const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; 54 const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
55 55
56 return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", 56 return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
57 ns_ops->name, inode->i_ino); 57 ns_ops->name, inode->i_ino);
@@ -95,8 +95,8 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb,
95 inode->i_op = &ns_inode_operations; 95 inode->i_op = &ns_inode_operations;
96 inode->i_mode = S_IFREG | S_IRUGO; 96 inode->i_mode = S_IFREG | S_IRUGO;
97 inode->i_fop = &ns_file_operations; 97 inode->i_fop = &ns_file_operations;
98 ei->ns_ops = ns_ops; 98 ei->ns.ns_ops = ns_ops;
99 ei->ns = ns; 99 ei->ns.ns = ns;
100 unlock_new_inode(inode); 100 unlock_new_inode(inode);
101 } else { 101 } else {
102 ns_ops->put(ns); 102 ns_ops->put(ns);
@@ -128,7 +128,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
128 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 128 if (!ptrace_may_access(task, PTRACE_MODE_READ))
129 goto out_put_task; 129 goto out_put_task;
130 130
131 ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); 131 ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops);
132 if (IS_ERR(ns_path.dentry)) { 132 if (IS_ERR(ns_path.dentry)) {
133 error = ERR_CAST(ns_path.dentry); 133 error = ERR_CAST(ns_path.dentry);
134 goto out_put_task; 134 goto out_put_task;
@@ -148,7 +148,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
148{ 148{
149 struct inode *inode = dentry->d_inode; 149 struct inode *inode = dentry->d_inode;
150 struct proc_inode *ei = PROC_I(inode); 150 struct proc_inode *ei = PROC_I(inode);
151 const struct proc_ns_operations *ns_ops = ei->ns_ops; 151 const struct proc_ns_operations *ns_ops = ei->ns.ns_ops;
152 struct task_struct *task; 152 struct task_struct *task;
153 void *ns; 153 void *ns;
154 char name[50]; 154 char name[50];
@@ -202,7 +202,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
202 ei = PROC_I(inode); 202 ei = PROC_I(inode);
203 inode->i_mode = S_IFLNK|S_IRWXUGO; 203 inode->i_mode = S_IFLNK|S_IRWXUGO;
204 inode->i_op = &proc_ns_link_inode_operations; 204 inode->i_op = &proc_ns_link_inode_operations;
205 ei->ns_ops = ns_ops; 205 ei->ns.ns_ops = ns_ops;
206 206
207 d_set_d_op(dentry, &pid_dentry_operations); 207 d_set_d_op(dentry, &pid_dentry_operations);
208 d_add(dentry, inode); 208 d_add(dentry, inode);
@@ -337,6 +337,11 @@ out_invalid:
337 return ERR_PTR(-EINVAL); 337 return ERR_PTR(-EINVAL);
338} 338}
339 339
340struct proc_ns *get_proc_ns(struct inode *inode)
341{
342 return &PROC_I(inode)->ns;
343}
344
340bool proc_ns_inode(struct inode *inode) 345bool proc_ns_inode(struct inode *inode)
341{ 346{
342 return inode->i_fop == &ns_file_operations; 347 return inode->i_fop == &ns_file_operations;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 30b590f5bd35..505afc950e0a 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -41,7 +41,7 @@ static int property_proc_show(struct seq_file *m, void *v)
41 41
42static int property_proc_open(struct inode *inode, struct file *file) 42static int property_proc_open(struct inode *inode, struct file *file)
43{ 43{
44 return single_open(file, property_proc_show, PDE(inode)->data); 44 return single_open(file, property_proc_show, __PDE_DATA(inode));
45} 45}
46 46
47static const struct file_operations property_proc_fops = { 47static const struct file_operations property_proc_fops = {
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index b4ac6572474f..986e83220d56 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -26,6 +26,10 @@
26 26
27#include "internal.h" 27#include "internal.h"
28 28
29static inline struct net *PDE_NET(struct proc_dir_entry *pde)
30{
31 return pde->parent->data;
32}
29 33
30static struct net *get_proc_net(const struct inode *inode) 34static struct net *get_proc_net(const struct inode *inode)
31{ 35{
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9c7fab1d23f0..41a6ea93f486 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -141,6 +141,8 @@ static void proc_kill_sb(struct super_block *sb)
141 struct pid_namespace *ns; 141 struct pid_namespace *ns;
142 142
143 ns = (struct pid_namespace *)sb->s_fs_info; 143 ns = (struct pid_namespace *)sb->s_fs_info;
144 if (ns->proc_self)
145 dput(ns->proc_self);
144 kill_anon_super(sb); 146 kill_anon_super(sb);
145 put_pid_ns(ns); 147 put_pid_ns(ns);
146} 148}
diff --git a/fs/proc/self.c b/fs/proc/self.c
index aa5cc3bff140..6b6a993b5c25 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -1,6 +1,8 @@
1#include <linux/proc_fs.h>
2#include <linux/sched.h> 1#include <linux/sched.h>
3#include <linux/namei.h> 2#include <linux/namei.h>
3#include <linux/slab.h>
4#include <linux/pid_namespace.h>
5#include "internal.h"
4 6
5/* 7/*
6 * /proc/self: 8 * /proc/self:
@@ -48,12 +50,43 @@ static const struct inode_operations proc_self_inode_operations = {
48 .put_link = proc_self_put_link, 50 .put_link = proc_self_put_link,
49}; 51};
50 52
51void __init proc_self_init(void) 53static unsigned self_inum;
54
55int proc_setup_self(struct super_block *s)
52{ 56{
53 struct proc_dir_entry *proc_self_symlink; 57 struct inode *root_inode = s->s_root->d_inode;
54 mode_t mode; 58 struct pid_namespace *ns = s->s_fs_info;
59 struct dentry *self;
60
61 mutex_lock(&root_inode->i_mutex);
62 self = d_alloc_name(s->s_root, "self");
63 if (self) {
64 struct inode *inode = new_inode_pseudo(s);
65 if (inode) {
66 inode->i_ino = self_inum;
67 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
68 inode->i_mode = S_IFLNK | S_IRWXUGO;
69 inode->i_uid = GLOBAL_ROOT_UID;
70 inode->i_gid = GLOBAL_ROOT_GID;
71 inode->i_op = &proc_self_inode_operations;
72 d_add(self, inode);
73 } else {
74 dput(self);
75 self = ERR_PTR(-ENOMEM);
76 }
77 } else {
78 self = ERR_PTR(-ENOMEM);
79 }
80 mutex_unlock(&root_inode->i_mutex);
81 if (IS_ERR(self)) {
82 pr_err("proc_fill_super: can't allocate /proc/self\n");
83 return PTR_ERR(self);
84 }
85 ns->proc_self = self;
86 return 0;
87}
55 88
56 mode = S_IFLNK | S_IRWXUGO; 89void __init proc_self_init(void)
57 proc_self_symlink = proc_create("self", mode, NULL, NULL ); 90{
58 proc_self_symlink->proc_iops = &proc_self_inode_operations; 91 proc_alloc_inum(&self_inum);
59} 92}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index b870f740ab5a..17f7e080d7ff 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -8,7 +8,7 @@
8 */ 8 */
9 9
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/proc_fs.h> 11#include <linux/kcore.h>
12#include <linux/user.h> 12#include <linux/user.h>
13#include <linux/elf.h> 13#include <linux/elf.h>
14#include <linux/elfcore.h> 14#include <linux/elfcore.h>
@@ -22,6 +22,7 @@
22#include <linux/list.h> 22#include <linux/list.h>
23#include <asm/uaccess.h> 23#include <asm/uaccess.h>
24#include <asm/io.h> 24#include <asm/io.h>
25#include "internal.h"
25 26
26/* List representing chunks of contiguous memory areas and their offsets in 27/* List representing chunks of contiguous memory areas and their offsets in
27 * vmcore file. 28 * vmcore file.
@@ -698,7 +699,7 @@ void vmcore_cleanup(void)
698 struct list_head *pos, *next; 699 struct list_head *pos, *next;
699 700
700 if (proc_vmcore) { 701 if (proc_vmcore) {
701 remove_proc_entry(proc_vmcore->name, proc_vmcore->parent); 702 proc_remove(proc_vmcore);
702 proc_vmcore = NULL; 703 proc_vmcore = NULL;
703 } 704 }
704 705
diff --git a/fs/read_write.c b/fs/read_write.c
index 8274a794253b..605dbbcb1973 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -459,6 +459,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
459 ret = rw_verify_area(WRITE, file, pos, count); 459 ret = rw_verify_area(WRITE, file, pos, count);
460 if (ret >= 0) { 460 if (ret >= 0) {
461 count = ret; 461 count = ret;
462 file_start_write(file);
462 if (file->f_op->write) 463 if (file->f_op->write)
463 ret = file->f_op->write(file, buf, count, pos); 464 ret = file->f_op->write(file, buf, count, pos);
464 else 465 else
@@ -468,6 +469,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
468 add_wchar(current, ret); 469 add_wchar(current, ret);
469 } 470 }
470 inc_syscw(current); 471 inc_syscw(current);
472 file_end_write(file);
471 } 473 }
472 474
473 return ret; 475 return ret;
@@ -576,7 +578,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
576} 578}
577EXPORT_SYMBOL(iov_shorten); 579EXPORT_SYMBOL(iov_shorten);
578 580
579ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 581static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
580 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 582 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
581{ 583{
582 struct kiocb kiocb; 584 struct kiocb kiocb;
@@ -601,7 +603,7 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
601} 603}
602 604
603/* Do it by hand, with file-ops */ 605/* Do it by hand, with file-ops */
604ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 606static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
605 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 607 unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
606{ 608{
607 struct iovec *vector = iov; 609 struct iovec *vector = iov;
@@ -743,6 +745,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
743 } else { 745 } else {
744 fn = (io_fn_t)file->f_op->write; 746 fn = (io_fn_t)file->f_op->write;
745 fnv = file->f_op->aio_write; 747 fnv = file->f_op->aio_write;
748 file_start_write(file);
746 } 749 }
747 750
748 if (fnv) 751 if (fnv)
@@ -751,6 +754,9 @@ static ssize_t do_readv_writev(int type, struct file *file,
751 else 754 else
752 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 755 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
753 756
757 if (type != READ)
758 file_end_write(file);
759
754out: 760out:
755 if (iov != iovstack) 761 if (iov != iovstack)
756 kfree(iov); 762 kfree(iov);
@@ -881,6 +887,201 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
881 return ret; 887 return ret;
882} 888}
883 889
890#ifdef CONFIG_COMPAT
891
892static ssize_t compat_do_readv_writev(int type, struct file *file,
893 const struct compat_iovec __user *uvector,
894 unsigned long nr_segs, loff_t *pos)
895{
896 compat_ssize_t tot_len;
897 struct iovec iovstack[UIO_FASTIOV];
898 struct iovec *iov = iovstack;
899 ssize_t ret;
900 io_fn_t fn;
901 iov_fn_t fnv;
902
903 ret = -EINVAL;
904 if (!file->f_op)
905 goto out;
906
907 ret = -EFAULT;
908 if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
909 goto out;
910
911 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
912 UIO_FASTIOV, iovstack, &iov);
913 if (ret <= 0)
914 goto out;
915
916 tot_len = ret;
917 ret = rw_verify_area(type, file, pos, tot_len);
918 if (ret < 0)
919 goto out;
920
921 fnv = NULL;
922 if (type == READ) {
923 fn = file->f_op->read;
924 fnv = file->f_op->aio_read;
925 } else {
926 fn = (io_fn_t)file->f_op->write;
927 fnv = file->f_op->aio_write;
928 file_start_write(file);
929 }
930
931 if (fnv)
932 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
933 pos, fnv);
934 else
935 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
936
937 if (type != READ)
938 file_end_write(file);
939
940out:
941 if (iov != iovstack)
942 kfree(iov);
943 if ((ret + (type == READ)) > 0) {
944 if (type == READ)
945 fsnotify_access(file);
946 else
947 fsnotify_modify(file);
948 }
949 return ret;
950}
951
952static size_t compat_readv(struct file *file,
953 const struct compat_iovec __user *vec,
954 unsigned long vlen, loff_t *pos)
955{
956 ssize_t ret = -EBADF;
957
958 if (!(file->f_mode & FMODE_READ))
959 goto out;
960
961 ret = -EINVAL;
962 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
963 goto out;
964
965 ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
966
967out:
968 if (ret > 0)
969 add_rchar(current, ret);
970 inc_syscr(current);
971 return ret;
972}
973
974COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd,
975 const struct compat_iovec __user *,vec,
976 unsigned long, vlen)
977{
978 struct fd f = fdget(fd);
979 ssize_t ret;
980 loff_t pos;
981
982 if (!f.file)
983 return -EBADF;
984 pos = f.file->f_pos;
985 ret = compat_readv(f.file, vec, vlen, &pos);
986 f.file->f_pos = pos;
987 fdput(f);
988 return ret;
989}
990
991COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
992 const struct compat_iovec __user *,vec,
993 unsigned long, vlen, loff_t, pos)
994{
995 struct fd f;
996 ssize_t ret;
997
998 if (pos < 0)
999 return -EINVAL;
1000 f = fdget(fd);
1001 if (!f.file)
1002 return -EBADF;
1003 ret = -ESPIPE;
1004 if (f.file->f_mode & FMODE_PREAD)
1005 ret = compat_readv(f.file, vec, vlen, &pos);
1006 fdput(f);
1007 return ret;
1008}
1009
1010COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd,
1011 const struct compat_iovec __user *,vec,
1012 unsigned long, vlen, u32, pos_low, u32, pos_high)
1013{
1014 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1015 return compat_sys_preadv64(fd, vec, vlen, pos);
1016}
1017
1018static size_t compat_writev(struct file *file,
1019 const struct compat_iovec __user *vec,
1020 unsigned long vlen, loff_t *pos)
1021{
1022 ssize_t ret = -EBADF;
1023
1024 if (!(file->f_mode & FMODE_WRITE))
1025 goto out;
1026
1027 ret = -EINVAL;
1028 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
1029 goto out;
1030
1031 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
1032
1033out:
1034 if (ret > 0)
1035 add_wchar(current, ret);
1036 inc_syscw(current);
1037 return ret;
1038}
1039
1040COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd,
1041 const struct compat_iovec __user *, vec,
1042 unsigned long, vlen)
1043{
1044 struct fd f = fdget(fd);
1045 ssize_t ret;
1046 loff_t pos;
1047
1048 if (!f.file)
1049 return -EBADF;
1050 pos = f.file->f_pos;
1051 ret = compat_writev(f.file, vec, vlen, &pos);
1052 f.file->f_pos = pos;
1053 fdput(f);
1054 return ret;
1055}
1056
1057COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
1058 const struct compat_iovec __user *,vec,
1059 unsigned long, vlen, loff_t, pos)
1060{
1061 struct fd f;
1062 ssize_t ret;
1063
1064 if (pos < 0)
1065 return -EINVAL;
1066 f = fdget(fd);
1067 if (!f.file)
1068 return -EBADF;
1069 ret = -ESPIPE;
1070 if (f.file->f_mode & FMODE_PWRITE)
1071 ret = compat_writev(f.file, vec, vlen, &pos);
1072 fdput(f);
1073 return ret;
1074}
1075
1076COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd,
1077 const struct compat_iovec __user *,vec,
1078 unsigned long, vlen, u32, pos_low, u32, pos_high)
1079{
1080 loff_t pos = ((loff_t)pos_high << 32) | pos_low;
1081 return compat_sys_pwritev64(fd, vec, vlen, pos);
1082}
1083#endif
1084
884static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1085static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
885 size_t count, loff_t max) 1086 size_t count, loff_t max)
886{ 1087{
diff --git a/fs/read_write.h b/fs/read_write.h
index d07b954c6e0c..0ec530d9305b 100644
--- a/fs/read_write.h
+++ b/fs/read_write.h
@@ -7,8 +7,3 @@
7typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 7typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
8typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, 8typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
9 unsigned long, loff_t); 9 unsigned long, loff_t);
10
11ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
12 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
13ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
14 unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 6165bd4784f6..dcaafcfc23b0 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -234,68 +234,9 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
234 return ret; 234 return ret;
235} 235}
236 236
237/* Write @count bytes at position @ppos in a file indicated by @file
238 from the buffer @buf.
239
240 generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want
241 something simple that works. It is not for serious use by general purpose filesystems, excepting the one that it was
242 written for (ext2/3). This is for several reasons:
243
244 * It has no understanding of any filesystem specific optimizations.
245
246 * It enters the filesystem repeatedly for each page that is written.
247
248 * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key
249 * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time
250 * to reiserfs which allows for fewer tree traversals.
251
252 * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks.
253
254 * Asking the block allocation code for blocks one at a time is slightly less efficient.
255
256 All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to
257 use it, but we were in a hurry to make code freeze, and so it couldn't be revised then. This new code should make
258 things right finally.
259
260 Future Features: providing search_by_key with hints.
261
262*/
263static ssize_t reiserfs_file_write(struct file *file, /* the file we are going to write into */
264 const char __user * buf, /* pointer to user supplied data
265 (in userspace) */
266 size_t count, /* amount of bytes to write */
267 loff_t * ppos /* pointer to position in file that we start writing at. Should be updated to
268 * new current position before returning. */
269 )
270{
271 struct inode *inode = file_inode(file); // Inode of the file that we are writing to.
272 /* To simplify coding at this time, we store
273 locked pages in array for now */
274 struct reiserfs_transaction_handle th;
275 th.t_trans_id = 0;
276
277 /* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items
278 * lying around (most of the disk, in fact). Despite the filesystem
279 * now being a v3.6 format, the old items still can't support large
280 * file sizes. Catch this case here, as the rest of the VFS layer is
281 * oblivious to the different limitations between old and new items.
282 * reiserfs_setattr catches this for truncates. This chunk is lifted
283 * from generic_write_checks. */
284 if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 &&
285 *ppos + count > MAX_NON_LFS) {
286 if (*ppos >= MAX_NON_LFS) {
287 return -EFBIG;
288 }
289 if (count > MAX_NON_LFS - (unsigned long)*ppos)
290 count = MAX_NON_LFS - (unsigned long)*ppos;
291 }
292
293 return do_sync_write(file, buf, count, ppos);
294}
295
296const struct file_operations reiserfs_file_operations = { 237const struct file_operations reiserfs_file_operations = {
297 .read = do_sync_read, 238 .read = do_sync_read,
298 .write = reiserfs_file_write, 239 .write = do_sync_write,
299 .unlocked_ioctl = reiserfs_ioctl, 240 .unlocked_ioctl = reiserfs_ioctl,
300#ifdef CONFIG_COMPAT 241#ifdef CONFIG_COMPAT
301 .compat_ioctl = reiserfs_compat_ioctl, 242 .compat_ioctl = reiserfs_compat_ioctl,
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 9cc0740adffa..33532f79b4f7 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -394,20 +394,24 @@ static int set_sb(struct super_block *sb, void *data)
394 return -ENOENT; 394 return -ENOENT;
395} 395}
396 396
397struct reiserfs_seq_private {
398 struct super_block *sb;
399 int (*show) (struct seq_file *, struct super_block *);
400};
401
397static void *r_start(struct seq_file *m, loff_t * pos) 402static void *r_start(struct seq_file *m, loff_t * pos)
398{ 403{
399 struct proc_dir_entry *de = m->private; 404 struct reiserfs_seq_private *priv = m->private;
400 struct super_block *s = de->parent->data;
401 loff_t l = *pos; 405 loff_t l = *pos;
402 406
403 if (l) 407 if (l)
404 return NULL; 408 return NULL;
405 409
406 if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, s))) 410 if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb)))
407 return NULL; 411 return NULL;
408 412
409 up_write(&s->s_umount); 413 up_write(&priv->sb->s_umount);
410 return s; 414 return priv->sb;
411} 415}
412 416
413static void *r_next(struct seq_file *m, void *v, loff_t * pos) 417static void *r_next(struct seq_file *m, void *v, loff_t * pos)
@@ -426,9 +430,8 @@ static void r_stop(struct seq_file *m, void *v)
426 430
427static int r_show(struct seq_file *m, void *v) 431static int r_show(struct seq_file *m, void *v)
428{ 432{
429 struct proc_dir_entry *de = m->private; 433 struct reiserfs_seq_private *priv = m->private;
430 int (*show) (struct seq_file *, struct super_block *) = de->data; 434 return priv->show(m, v);
431 return show(m, v);
432} 435}
433 436
434static const struct seq_operations r_ops = { 437static const struct seq_operations r_ops = {
@@ -440,11 +443,15 @@ static const struct seq_operations r_ops = {
440 443
441static int r_open(struct inode *inode, struct file *file) 444static int r_open(struct inode *inode, struct file *file)
442{ 445{
443 int ret = seq_open(file, &r_ops); 446 struct reiserfs_seq_private *priv;
447 int ret = seq_open_private(file, &r_ops,
448 sizeof(struct reiserfs_seq_private));
444 449
445 if (!ret) { 450 if (!ret) {
446 struct seq_file *m = file->private_data; 451 struct seq_file *m = file->private_data;
447 m->private = PDE(inode); 452 priv = m->private;
453 priv->sb = proc_get_parent_data(inode);
454 priv->show = PDE_DATA(inode);
448 } 455 }
449 return ret; 456 return ret;
450} 457}
@@ -453,7 +460,7 @@ static const struct file_operations r_file_operations = {
453 .open = r_open, 460 .open = r_open,
454 .read = seq_read, 461 .read = seq_read,
455 .llseek = seq_lseek, 462 .llseek = seq_lseek,
456 .release = seq_release, 463 .release = seq_release_private,
457 .owner = THIS_MODULE, 464 .owner = THIS_MODULE,
458}; 465};
459 466
@@ -479,9 +486,8 @@ int reiserfs_proc_info_init(struct super_block *sb)
479 *s = '!'; 486 *s = '!';
480 487
481 spin_lock_init(&__PINFO(sb).lock); 488 spin_lock_init(&__PINFO(sb).lock);
482 REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root); 489 REISERFS_SB(sb)->procdir = proc_mkdir_data(b, 0, proc_info_root, sb);
483 if (REISERFS_SB(sb)->procdir) { 490 if (REISERFS_SB(sb)->procdir) {
484 REISERFS_SB(sb)->procdir->data = sb;
485 add_file(sb, "version", show_version); 491 add_file(sb, "version", show_version);
486 add_file(sb, "super", show_super); 492 add_file(sb, "super", show_super);
487 add_file(sb, "per-level", show_per_level); 493 add_file(sb, "per-level", show_per_level);
@@ -499,29 +505,17 @@ int reiserfs_proc_info_init(struct super_block *sb)
499int reiserfs_proc_info_done(struct super_block *sb) 505int reiserfs_proc_info_done(struct super_block *sb)
500{ 506{
501 struct proc_dir_entry *de = REISERFS_SB(sb)->procdir; 507 struct proc_dir_entry *de = REISERFS_SB(sb)->procdir;
502 char b[BDEVNAME_SIZE]; 508 if (de) {
503 char *s; 509 char b[BDEVNAME_SIZE];
510 char *s;
504 511
505 /* Some block devices use /'s */ 512 /* Some block devices use /'s */
506 strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE); 513 strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
507 s = strchr(b, '/'); 514 s = strchr(b, '/');
508 if (s) 515 if (s)
509 *s = '!'; 516 *s = '!';
510 517
511 if (de) { 518 remove_proc_subtree(b, proc_info_root);
512 remove_proc_entry("journal", de);
513 remove_proc_entry("oidmap", de);
514 remove_proc_entry("on-disk-super", de);
515 remove_proc_entry("bitmap", de);
516 remove_proc_entry("per-level", de);
517 remove_proc_entry("super", de);
518 remove_proc_entry("version", de);
519 }
520 spin_lock(&__PINFO(sb).lock);
521 __PINFO(sb).exiting = 1;
522 spin_unlock(&__PINFO(sb).lock);
523 if (proc_info_root) {
524 remove_proc_entry(b, proc_info_root);
525 REISERFS_SB(sb)->procdir = NULL; 519 REISERFS_SB(sb)->procdir = NULL;
526 } 520 }
527 return 0; 521 return 0;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 38bb59f3f2ad..774c1eb7f1c9 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -599,6 +599,24 @@ int single_open(struct file *file, int (*show)(struct seq_file *, void *),
599} 599}
600EXPORT_SYMBOL(single_open); 600EXPORT_SYMBOL(single_open);
601 601
602int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
603 void *data, size_t size)
604{
605 char *buf = kmalloc(size, GFP_KERNEL);
606 int ret;
607 if (!buf)
608 return -ENOMEM;
609 ret = single_open(file, show, data);
610 if (ret) {
611 kfree(buf);
612 return ret;
613 }
614 ((struct seq_file *)file->private_data)->buf = buf;
615 ((struct seq_file *)file->private_data)->size = size;
616 return 0;
617}
618EXPORT_SYMBOL(single_open_size);
619
602int single_release(struct inode *inode, struct file *file) 620int single_release(struct inode *inode, struct file *file)
603{ 621{
604 const struct seq_operations *op = ((struct seq_file *)file->private_data)->op; 622 const struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
diff --git a/fs/splice.c b/fs/splice.c
index 6b485b8753bd..e6b25598c8c4 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -219,7 +219,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
219 page_nr++; 219 page_nr++;
220 ret += buf->len; 220 ret += buf->len;
221 221
222 if (pipe->inode) 222 if (pipe->files)
223 do_wakeup = 1; 223 do_wakeup = 1;
224 224
225 if (!--spd->nr_pages) 225 if (!--spd->nr_pages)
@@ -829,7 +829,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
829 ops->release(pipe, buf); 829 ops->release(pipe, buf);
830 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); 830 pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
831 pipe->nrbufs--; 831 pipe->nrbufs--;
832 if (pipe->inode) 832 if (pipe->files)
833 sd->need_wakeup = true; 833 sd->need_wakeup = true;
834 } 834 }
835 835
@@ -1001,8 +1001,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1001 }; 1001 };
1002 ssize_t ret; 1002 ssize_t ret;
1003 1003
1004 sb_start_write(inode->i_sb);
1005
1006 pipe_lock(pipe); 1004 pipe_lock(pipe);
1007 1005
1008 splice_from_pipe_begin(&sd); 1006 splice_from_pipe_begin(&sd);
@@ -1038,7 +1036,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
1038 *ppos += ret; 1036 *ppos += ret;
1039 balance_dirty_pages_ratelimited(mapping); 1037 balance_dirty_pages_ratelimited(mapping);
1040 } 1038 }
1041 sb_end_write(inode->i_sb);
1042 1039
1043 return ret; 1040 return ret;
1044} 1041}
@@ -1118,7 +1115,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
1118 else 1115 else
1119 splice_write = default_file_splice_write; 1116 splice_write = default_file_splice_write;
1120 1117
1121 return splice_write(pipe, out, ppos, len, flags); 1118 file_start_write(out);
1119 ret = splice_write(pipe, out, ppos, len, flags);
1120 file_end_write(out);
1121 return ret;
1122} 1122}
1123 1123
1124/* 1124/*
@@ -1184,7 +1184,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
1184 */ 1184 */
1185 pipe = current->splice_pipe; 1185 pipe = current->splice_pipe;
1186 if (unlikely(!pipe)) { 1186 if (unlikely(!pipe)) {
1187 pipe = alloc_pipe_info(NULL); 1187 pipe = alloc_pipe_info();
1188 if (!pipe) 1188 if (!pipe)
1189 return -ENOMEM; 1189 return -ENOMEM;
1190 1190
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f03bf1a456fb..3800128d2171 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -775,8 +775,6 @@ xfs_file_aio_write(
775 if (ocount == 0) 775 if (ocount == 0)
776 return 0; 776 return 0;
777 777
778 sb_start_write(inode->i_sb);
779
780 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 778 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
781 ret = -EIO; 779 ret = -EIO;
782 goto out; 780 goto out;
@@ -800,7 +798,6 @@ xfs_file_aio_write(
800 } 798 }
801 799
802out: 800out:
803 sb_end_write(inode->i_sb);
804 return ret; 801 return ret;
805} 802}
806 803