Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull VFS updates from Al Viro, Misc cleanups all over the place, mainly wrt /proc interfaces (switch create_proc_entry to proc_create(), get rid of the deprecated create_proc_read_entry() in favor of using proc_create_data() and seq_file etc). 7kloc removed. * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (204 commits) don't bother with deferred freeing of fdtables proc: Move non-public stuff from linux/proc_fs.h to fs/proc/internal.h proc: Make the PROC_I() and PDE() macros internal to procfs proc: Supply a function to remove a proc entry by PDE take cgroup_open() and cpuset_open() to fs/proc/base.c ppc: Clean up scanlog ppc: Clean up rtas_flash driver somewhat hostap: proc: Use remove_proc_subtree() drm: proc: Use remove_proc_subtree() drm: proc: Use minor->index to label things, not PDE->name drm: Constify drm_proc_list[] zoran: Don't print proc_dir_entry data in debug reiserfs: Don't access the proc_dir_entry in r_open(), r_start() r_show() proc: Supply an accessor for getting the data from a PDE's parent airo: Use remove_proc_subtree() rtl8192u: Don't need to save device proc dir PDE rtl8187se: Use a dir under /proc/net/r8180/ proc: Add proc_mkdir_data() proc: Move some bits from linux/proc_fs.h to linux/{of.h,signal.h,tty.h} proc: Move PDE_NET() to fs/proc/proc_net.c ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-05-01 20:51:54 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-05-01 20:51:54 -0400
commit: 20b4fb485227404329e41ad15588afad3df23050 (patch)
tree: f3e099f0ab3da8a93b447203e294d2bb22f6dc05 /fs
parent: b9394d8a657cd3c064fa432aa0905c1b58b38fe9 (diff)
parent: ac3e3c5b1164397656df81b9e9ab4991184d3236 (diff)
58 files changed, 1307 insertions, 1613 deletions
diff --git a/fs/Makefile b/fs/Makefile
index f0db9c941a5f..4fe6df3ec28f 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -7,7 +7,7 @@
 obj-y :=        open.o read_write.o file_table.o super.o \
                char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \
-                ioctl.o readdir.o select.o fifo.o dcache.o inode.o \
+                ioctl.o readdir.o select.o dcache.o inode.o \
                attr.o bad_inode.o file.o filesystems.o namespace.o \
                seq_file.o xattr.o libfs.o fs-writeback.o \
                pnode.o splice.o sync.o utimes.o \
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 096b23f821a1..526e4bbbde59 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -190,7 +190,7 @@ static int afs_proc_cells_open(struct inode *inode, struct file *file)
                return ret;
        m = file->private_data;
-        m->private = PDE(inode)->data;
+        m->private = PDE_DATA(inode);
        return 0;
 }
@@ -448,7 +448,7 @@ static int afs_proc_cell_volumes_open(struct inode *inode, struct file *file)
        struct seq_file *m;
        int ret;
-        cell = PDE(inode)->data;
+        cell = PDE_DATA(inode);
        if (!cell)
                return -ENOENT;
@@ -554,7 +554,7 @@ static int afs_proc_cell_vlservers_open(struct inode *inode, struct file *file)
        struct seq_file *m;
        int ret;
-        cell = PDE(inode)->data;
+        cell = PDE_DATA(inode);
        if (!cell)
                return -ENOENT;
@@ -659,7 +659,7 @@ static int afs_proc_cell_servers_open(struct inode *inode, struct file *file)
        struct seq_file *m;
        int ret;
-        cell = PDE(inode)->data;
+        cell = PDE_DATA(inode);
        if (!cell)
                return -ENOENT;
diff --git a/fs/aio.c b/fs/aio.c
index 6db8745c2edd..351afe7ac78e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1324,6 +1324,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
        if (iocb->ki_pos < 0)
                return -EINVAL;
+        if (opcode == IOCB_CMD_PWRITEV)
+                file_start_write(file);
        do {
                ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
                            iocb->ki_nr_segs - iocb->ki_cur_seg,
@@ -1336,6 +1338,8 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
        } while (ret > 0 && iocb->ki_left > 0 &&
                 (opcode == IOCB_CMD_PWRITEV ||
                  (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));
+        if (opcode == IOCB_CMD_PWRITEV)
+                file_end_write(file);
        /* This means we must have transferred all that we could */
        /* No need to retry anymore */
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 02fe378fc506..bce87694f7b0 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -286,15 +286,12 @@ static int load_aout_binary(struct linux_binprm * bprm)
                        return error;
                }
-                error = bprm->file->f_op->read(bprm->file,
+                error = read_code(bprm->file, text_addr, pos,
-                          (char __user *)text_addr,
+                                  ex.a_text+ex.a_data);
-                          ex.a_text+ex.a_data, &pos);
                if ((signed long)error < 0) {
                        send_sig(SIGKILL, current, 0);
                        return error;
                }
-                         
-                flush_icache_range(text_addr, text_addr+ex.a_text+ex.a_data);
        } else {
                if ((ex.a_text & 0xfff || ex.a_data & 0xfff) &&
                    (N_MAGIC(ex) != NMAGIC) && printk_ratelimit())
@@ -310,14 +307,9 @@ static int load_aout_binary(struct linux_binprm * bprm)
                }
                if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) {
-                        loff_t pos = fd_offset;
                        vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data);
-                        bprm->file->f_op->read(bprm->file,
+                        read_code(bprm->file, N_TXTADDR(ex), fd_offset,
-                                        (char __user *)N_TXTADDR(ex),
+                                  ex.a_text + ex.a_data);
-                                        ex.a_text+ex.a_data, &pos);
-                        flush_icache_range((unsigned long) N_TXTADDR(ex),
-                                           (unsigned long) N_TXTADDR(ex) +
-                                           ex.a_text+ex.a_data);
                        goto beyond_if;
                }
@@ -396,8 +388,6 @@ static int load_aout_library(struct file *file)
        start_addr =  ex.a_entry & 0xfffff000;
        if ((N_TXTOFF(ex) & ~PAGE_MASK) != 0) {
-                loff_t pos = N_TXTOFF(ex);
                if (printk_ratelimit())
                {
                        printk(KERN_WARNING 
@@ -406,11 +396,8 @@ static int load_aout_library(struct file *file)
                }
                vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss);
                
-                file->f_op->read(file, (char __user *)start_addr,
+                read_code(file, start_addr, N_TXTOFF(ex),
-                        ex.a_text + ex.a_data, &pos);
+                          ex.a_text + ex.a_data);
-                flush_icache_range((unsigned long) start_addr,
-                                   (unsigned long) start_addr + ex.a_text + ex.a_data);
                retval = 0;
                goto out;
        }
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index c1cc06aed601..9dac212fc6f9 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -926,7 +926,6 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
        struct elf32_fdpic_loadseg *seg;
        struct elf32_phdr *phdr;
        unsigned long load_addr, base = ULONG_MAX, top = 0, maddr = 0, mflags;
-        loff_t fpos;
        int loop, ret;
        load_addr = params->load_addr;
@@ -964,14 +963,12 @@ static int elf_fdpic_map_file_constdisp_on_uclinux(
                if (params->phdrs[loop].p_type != PT_LOAD)
                        continue;
-                fpos = phdr->p_offset;
                seg->addr = maddr + (phdr->p_vaddr - base);
                seg->p_vaddr = phdr->p_vaddr;
                seg->p_memsz = phdr->p_memsz;
-                ret = file->f_op->read(file, (void *) seg->addr,
+                ret = read_code(file, seg->addr, phdr->p_offset,
-                                       phdr->p_filesz, &fpos);
+                                       phdr->p_filesz);
                if (ret < 0)
                        return ret;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 2036d21baaef..d50bbe59da1e 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -207,11 +207,12 @@ static int decompress_exec(
        /* Read in first chunk of data and parse gzip header. */
        fpos = offset;
-        ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos);
+        ret = kernel_read(bprm->file, offset, buf, LBUFSIZE);
        strm.next_in = buf;
        strm.avail_in = ret;
        strm.total_in = 0;
+        fpos += ret;
        retval = -ENOEXEC;
@@ -277,7 +278,7 @@ static int decompress_exec(
        }
        while ((ret = zlib_inflate(&strm, Z_NO_FLUSH)) == Z_OK) {
-                ret = bprm->file->f_op->read(bprm->file, buf, LBUFSIZE, &fpos);
+                ret = kernel_read(bprm->file, fpos, buf, LBUFSIZE);
                if (ret <= 0)
                        break;
                len -= ret;
@@ -285,6 +286,7 @@ static int decompress_exec(
                strm.next_in = buf;
                strm.avail_in = ret;
                strm.total_in = 0;
+                fpos += ret;
        }
        if (ret < 0) {
@@ -428,6 +430,7 @@ static int load_flat_file(struct linux_binprm * bprm,
        unsigned long textpos = 0, datapos = 0, result;
        unsigned long realdatastart = 0;
        unsigned long text_len, data_len, bss_len, stack_len, flags;
+        unsigned long full_data;
        unsigned long len, memp = 0;
        unsigned long memp_size, extra, rlim;
        unsigned long *reloc = 0, *rp;
@@ -451,6 +454,7 @@ static int load_flat_file(struct linux_binprm * bprm,
        relocs    = ntohl(hdr->reloc_count);
        flags     = ntohl(hdr->flags);
        rev       = ntohl(hdr->rev);
+        full_data = data_len + relocs * sizeof(unsigned long);
        if (strncmp(hdr->magic, "bFLT", 4)) {
                /*
@@ -577,12 +581,12 @@ static int load_flat_file(struct linux_binprm * bprm,
 #ifdef CONFIG_BINFMT_ZFLAT
                if (flags & FLAT_FLAG_GZDATA) {
                        result = decompress_exec(bprm, fpos, (char *) datapos, 
-                                                 data_len + (relocs * sizeof(unsigned long)), 0);
+                                                 full_data, 0);
                } else
 #endif
                {
-                        result = bprm->file->f_op->read(bprm->file, (char *) datapos,
+                        result = read_code(bprm->file, datapos, fpos,
-                                        data_len + (relocs * sizeof(unsigned long)), &fpos);
+                                        full_data);
                }
                if (IS_ERR_VALUE(result)) {
                        printk("Unable to read data+bss, errno %d\n", (int)-result);
@@ -627,30 +631,25 @@ static int load_flat_file(struct linux_binprm * bprm,
                if (flags & FLAT_FLAG_GZIP) {
                        result = decompress_exec(bprm, sizeof (struct flat_hdr),
                                         (((char *) textpos) + sizeof (struct flat_hdr)),
-                                         (text_len + data_len + (relocs * sizeof(unsigned long))
+                                         (text_len + full_data
                                                  - sizeof (struct flat_hdr)),
                                         0);
                        memmove((void *) datapos, (void *) realdatastart,
-                                        data_len + (relocs * sizeof(unsigned long)));
+                                        full_data);
                } else if (flags & FLAT_FLAG_GZDATA) {
-                        fpos = 0;
+                        result = read_code(bprm->file, textpos, 0, text_len);
-                        result = bprm->file->f_op->read(bprm->file,
-                                        (char *) textpos, text_len, &fpos);
                        if (!IS_ERR_VALUE(result))
                                result = decompress_exec(bprm, text_len, (char *) datapos,
-                                                 data_len + (relocs * sizeof(unsigned long)), 0);
+                                                 full_data, 0);
                }
                else
 #endif
                {
-                        fpos = 0;
+                        result = read_code(bprm->file, textpos, 0, text_len);
-                        result = bprm->file->f_op->read(bprm->file,
+                        if (!IS_ERR_VALUE(result))
-                                        (char *) textpos, text_len, &fpos);
+                                result = read_code(bprm->file, datapos,
-                        if (!IS_ERR_VALUE(result)) {
+                                                   ntohl(hdr->data_start),
-                                fpos = ntohl(hdr->data_start);
+                                                   full_data);
-                                result = bprm->file->f_op->read(bprm->file, (char *) datapos,
-                                        data_len + (relocs * sizeof(unsigned long)), &fpos);
-                        }
                }
                if (IS_ERR_VALUE(result)) {
                        printk("Unable to read code+data+bss, errno %d\n",(int)-result);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index ade03e6f7bd2..bb8b7a0e28a6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1514,8 +1514,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        size_t count, ocount;
        bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
-        sb_start_write(inode->i_sb);
        mutex_lock(&inode->i_mutex);
        err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
@@ -1617,7 +1615,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        if (sync)
                atomic_dec(&BTRFS_I(inode)->sync_writers);
 out:
-        sb_end_write(inode->i_sb);
        current->backing_dev_info = NULL;
        return num_written ? num_written : err;
 }
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 480992259707..317f9ee9c991 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -962,12 +962,14 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
                        }
                        data = kmap(page);
+                        file_start_write(file);
                        old_fs = get_fs();
                        set_fs(KERNEL_DS);
                        ret = file->f_op->write(
                                file, (const void __user *) data, len, &pos);
                        set_fs(old_fs);
                        kunmap(page);
+                        file_end_write(file);
                        if (ret != len)
                                ret = -EIO;
                }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7a0dd99e4507..2d4a231dd70b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2520,8 +2520,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
        BUG_ON(iocb->ki_pos != pos);
-        sb_start_write(inode->i_sb);
        /*
         * We need to hold the sem to be sure nobody modifies lock list
         * with a brlock that prevents writing.
@@ -2545,7 +2543,6 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
        }
        up_read(&cinode->lock_sem);
-        sb_end_write(inode->i_sb);
        return rc;
 }
diff --git a/fs/coda/file.c b/fs/coda/file.c
index fa4c100bdc7d..380b798f8443 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -79,6 +79,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
                return -EINVAL;
        host_inode = file_inode(host_file);
+        file_start_write(host_file);
        mutex_lock(&coda_inode->i_mutex);
        ret = host_file->f_op->write(host_file, buf, count, ppos);
@@ -87,6 +88,7 @@ coda_file_write(struct file *coda_file, const char __user *buf, size_t count, lo
        coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
        coda_inode->i_mtime = coda_inode->i_ctime = CURRENT_TIME_SEC;
        mutex_unlock(&coda_inode->i_mutex);
+        file_end_write(host_file);
        return ret;
 }
diff --git a/fs/compat.c b/fs/compat.c
index 5f83ffa42115..d0560c93973d 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1068,190 +1068,6 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
 }
 #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */
-static ssize_t compat_do_readv_writev(int type, struct file *file,
-                               const struct compat_iovec __user *uvector,
-                               unsigned long nr_segs, loff_t *pos)
-{
-        compat_ssize_t tot_len;
-        struct iovec iovstack[UIO_FASTIOV];
-        struct iovec *iov = iovstack;
-        ssize_t ret;
-        io_fn_t fn;
-        iov_fn_t fnv;
-        ret = -EINVAL;
-        if (!file->f_op)
-                goto out;
-        ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
-                                               UIO_FASTIOV, iovstack, &iov);
-        if (ret <= 0)
-                goto out;
-        tot_len = ret;
-        ret = rw_verify_area(type, file, pos, tot_len);
-        if (ret < 0)
-                goto out;
-        fnv = NULL;
-        if (type == READ) {
-                fn = file->f_op->read;
-                fnv = file->f_op->aio_read;
-        } else {
-                fn = (io_fn_t)file->f_op->write;
-                fnv = file->f_op->aio_write;
-        }
-        if (fnv)
-                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
-                                                pos, fnv);
-        else
-                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
-out:
-        if (iov != iovstack)
-                kfree(iov);
-        if ((ret + (type == READ)) > 0) {
-                if (type == READ)
-                        fsnotify_access(file);
-                else
-                        fsnotify_modify(file);
-        }
-        return ret;
-}
-static size_t compat_readv(struct file *file,
-                           const struct compat_iovec __user *vec,
-                           unsigned long vlen, loff_t *pos)
-{
-        ssize_t ret = -EBADF;
-        if (!(file->f_mode & FMODE_READ))
-                goto out;
-        ret = -EINVAL;
-        if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
-                goto out;
-        ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
-out:
-        if (ret > 0)
-                add_rchar(current, ret);
-        inc_syscr(current);
-        return ret;
-}
-asmlinkage ssize_t
-compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
-                 unsigned long vlen)
-{
-        struct fd f = fdget(fd);
-        ssize_t ret;
-        loff_t pos;
-        if (!f.file)
-                return -EBADF;
-        pos = f.file->f_pos;
-        ret = compat_readv(f.file, vec, vlen, &pos);
-        f.file->f_pos = pos;
-        fdput(f);
-        return ret;
-}
-asmlinkage ssize_t
-compat_sys_preadv64(unsigned long fd, const struct compat_iovec __user *vec,
-                    unsigned long vlen, loff_t pos)
-{
-        struct fd f;
-        ssize_t ret;
-        if (pos < 0)
-                return -EINVAL;
-        f = fdget(fd);
-        if (!f.file)
-                return -EBADF;
-        ret = -ESPIPE;
-        if (f.file->f_mode & FMODE_PREAD)
-                ret = compat_readv(f.file, vec, vlen, &pos);
-        fdput(f);
-        return ret;
-}
-asmlinkage ssize_t
-compat_sys_preadv(unsigned long fd, const struct compat_iovec __user *vec,
-                  unsigned long vlen, u32 pos_low, u32 pos_high)
-{
-        loff_t pos = ((loff_t)pos_high << 32) | pos_low;
-        return compat_sys_preadv64(fd, vec, vlen, pos);
-}
-static size_t compat_writev(struct file *file,
-                            const struct compat_iovec __user *vec,
-                            unsigned long vlen, loff_t *pos)
-{
-        ssize_t ret = -EBADF;
-        if (!(file->f_mode & FMODE_WRITE))
-                goto out;
-        ret = -EINVAL;
-        if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
-                goto out;
-        ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
-out:
-        if (ret > 0)
-                add_wchar(current, ret);
-        inc_syscw(current);
-        return ret;
-}
-asmlinkage ssize_t
-compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
-                  unsigned long vlen)
-{
-        struct fd f = fdget(fd);
-        ssize_t ret;
-        loff_t pos;
-        if (!f.file)
-                return -EBADF;
-        pos = f.file->f_pos;
-        ret = compat_writev(f.file, vec, vlen, &pos);
-        f.file->f_pos = pos;
-        fdput(f);
-        return ret;
-}
-asmlinkage ssize_t
-compat_sys_pwritev64(unsigned long fd, const struct compat_iovec __user *vec,
-                     unsigned long vlen, loff_t pos)
-{
-        struct fd f;
-        ssize_t ret;
-        if (pos < 0)
-                return -EINVAL;
-        f = fdget(fd);
-        if (!f.file)
-                return -EBADF;
-        ret = -ESPIPE;
-        if (f.file->f_mode & FMODE_PWRITE)
-                ret = compat_writev(f.file, vec, vlen, &pos);
-        fdput(f);
-        return ret;
-}
-asmlinkage ssize_t
-compat_sys_pwritev(unsigned long fd, const struct compat_iovec __user *vec,
-                   unsigned long vlen, u32 pos_low, u32 pos_high)
-{
-        loff_t pos = ((loff_t)pos_high << 32) | pos_low;
-        return compat_sys_pwritev64(fd, vec, vlen, pos);
-}
 /*
 * Exactly like fs/open.c:sys_open(), except that it doesn't set the
 * O_LARGEFILE flag.
diff --git a/fs/coredump.c b/fs/coredump.c
index ec306cc9a28a..a9abe313e8d5 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -432,9 +432,7 @@ static bool dump_interrupted(void)
 static void wait_for_dump_helpers(struct file *file)
 {
-        struct pipe_inode_info *pipe;
+        struct pipe_inode_info *pipe = file->private_data;
-        pipe = file_inode(file)->i_pipe;
        pipe_lock(pipe);
        pipe->readers++;
@@ -656,7 +654,9 @@ void do_coredump(siginfo_t *siginfo)
                goto close_fail;
        if (displaced)
                put_files_struct(displaced);
+        file_start_write(cprm.file);
        core_dumped = !dump_interrupted() && binfmt->core_dump(&cprm);
+        file_end_write(cprm.file);
        if (ispipe && core_pipe_limit)
                wait_for_dump_helpers(cprm.file);
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index ede07fc7309f..bfb531564319 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -9,6 +9,7 @@
 #include <linux/efi.h>
 #include <linux/fs.h>
+#include <linux/slab.h>
 #include "internal.h"
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index 640e289d522e..7e787fb90293 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -10,6 +10,7 @@
 #include <linux/efi.h>
 #include <linux/fs.h>
 #include <linux/ctype.h>
+#include <linux/slab.h>
 #include "internal.h"
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index 525a2a1ac16c..141aee31884f 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -13,6 +13,8 @@
 #include <linux/module.h>
 #include <linux/pagemap.h>
 #include <linux/ucs2_string.h>
+#include <linux/slab.h>
+#include <linux/magic.h>
 #include "internal.h"
diff --git a/fs/exec.c b/fs/exec.c
index 963f510a25ab..643019585574 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -802,6 +802,15 @@ int kernel_read(struct file *file, loff_t offset,
 EXPORT_SYMBOL(kernel_read);
+ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
+{
+        ssize_t res = file->f_op->read(file, (void __user *)addr, len, &pos);
+        if (res > 0)
+                flush_icache_range(addr, addr + len);
+        return res;
+}
+EXPORT_SYMBOL(read_code);
 static int exec_mmap(struct mm_struct *mm)
 {
        struct task_struct *tsk;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a11ea4d6164c..b1ed9e07434b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2260,7 +2260,7 @@ static const struct seq_operations ext4_mb_seq_groups_ops = {
 static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
 {
-        struct super_block *sb = PDE(inode)->data;
+        struct super_block *sb = PDE_DATA(inode);
        int rc;
        rc = seq_open(file, &ext4_mb_seq_groups_ops);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dbc7c090c13a..24a146bde742 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1806,7 +1806,7 @@ static int options_seq_show(struct seq_file *seq, void *offset)
 static int options_open_fs(struct inode *inode, struct file *file)
 {
-        return single_open(file, options_seq_show, PDE(inode)->data);
+        return single_open(file, options_seq_show, PDE_DATA(inode));
 }
 static const struct file_operations ext4_seq_options_fops = {
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 137af4255da6..44abc2f286e0 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -299,7 +299,7 @@ int f2fs_acl_chmod(struct inode *inode)
        struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
        struct posix_acl *acl;
        int error;
-        mode_t mode = get_inode_mode(inode);
+        umode_t mode = get_inode_mode(inode);
        if (!test_opt(sbi, POSIX_ACL))
                return 0;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a1f38443ecee..1be948768e2f 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -60,7 +60,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = {
 static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode)
 {
-        mode_t mode = inode->i_mode;
+        umode_t mode = inode->i_mode;
        de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT];
 }
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 958a46da19ae..db626282d424 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -590,7 +590,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        {
                unsigned int oldflags;
-                ret = mnt_want_write(filp->f_path.mnt);
+                ret = mnt_want_write_file(filp);
                if (ret)
                        return ret;
@@ -627,7 +627,7 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                inode->i_ctime = CURRENT_TIME;
                mark_inode_dirty(inode);
 out:
-                mnt_drop_write(filp->f_path.mnt);
+                mnt_drop_write_file(filp);
                return ret;
        }
        default:
diff --git a/fs/fifo.c b/fs/fifo.c
deleted file mode 100644
index cf6f4345ceb0..000000000000
--- a/fs/fifo.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- *  linux/fs/fifo.c
- *
- *  written by Paul H. Hargrove
- *
- *  Fixes:
- *      10-06-1999, AV: fixed OOM handling in fifo_open(), moved
- *                      initialization there, switched to external
- *                      allocation of pipe_inode_info.
- */
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/sched.h>
-#include <linux/pipe_fs_i.h>
-static int wait_for_partner(struct inode* inode, unsigned int *cnt)
-{
-        int cur = *cnt; 
-        while (cur == *cnt) {
-                pipe_wait(inode->i_pipe);
-                if (signal_pending(current))
-                        break;
-        }
-        return cur == *cnt ? -ERESTARTSYS : 0;
-}
-static void wake_up_partner(struct inode* inode)
-{
-        wake_up_interruptible(&inode->i_pipe->wait);
-}
-static int fifo_open(struct inode *inode, struct file *filp)
-{
-        struct pipe_inode_info *pipe;
-        int ret;
-        mutex_lock(&inode->i_mutex);
-        pipe = inode->i_pipe;
-        if (!pipe) {
-                ret = -ENOMEM;
-                pipe = alloc_pipe_info(inode);
-                if (!pipe)
-                        goto err_nocleanup;
-                inode->i_pipe = pipe;
-        }
-        filp->f_version = 0;
-        /* We can only do regular read/write on fifos */
-        filp->f_mode &= (FMODE_READ | FMODE_WRITE);
-        switch (filp->f_mode) {
-        case FMODE_READ:
-        /*
-         *  O_RDONLY
-         *  POSIX.1 says that O_NONBLOCK means return with the FIFO
-         *  opened, even when there is no process writing the FIFO.
-         */
-                filp->f_op = &read_pipefifo_fops;
-                pipe->r_counter++;
-                if (pipe->readers++ == 0)
-                        wake_up_partner(inode);
-                if (!pipe->writers) {
-                        if ((filp->f_flags & O_NONBLOCK)) {
-                                /* suppress POLLHUP until we have
-                                 * seen a writer */
-                                filp->f_version = pipe->w_counter;
-                        } else {
-                                if (wait_for_partner(inode, &pipe->w_counter))
-                                        goto err_rd;
-                        }
-                }
-                break;
-        
-        case FMODE_WRITE:
-        /*
-         *  O_WRONLY
-         *  POSIX.1 says that O_NONBLOCK means return -1 with
-         *  errno=ENXIO when there is no process reading the FIFO.
-         */
-                ret = -ENXIO;
-                if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
-                        goto err;
-                filp->f_op = &write_pipefifo_fops;
-                pipe->w_counter++;
-                if (!pipe->writers++)
-                        wake_up_partner(inode);
-                if (!pipe->readers) {
-                        if (wait_for_partner(inode, &pipe->r_counter))
-                                goto err_wr;
-                }
-                break;
-        
-        case FMODE_READ | FMODE_WRITE:
-        /*
-         *  O_RDWR
-         *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
-         *  This implementation will NEVER block on a O_RDWR open, since
-         *  the process can at least talk to itself.
-         */
-                filp->f_op = &rdwr_pipefifo_fops;
-                pipe->readers++;
-                pipe->writers++;
-                pipe->r_counter++;
-                pipe->w_counter++;
-                if (pipe->readers == 1 || pipe->writers == 1)
-                        wake_up_partner(inode);
-                break;
-        default:
-                ret = -EINVAL;
-                goto err;
-        }
-        /* Ok! */
-        mutex_unlock(&inode->i_mutex);
-        return 0;
-err_rd:
-        if (!--pipe->readers)
-                wake_up_interruptible(&pipe->wait);
-        ret = -ERESTARTSYS;
-        goto err;
-err_wr:
-        if (!--pipe->writers)
-                wake_up_interruptible(&pipe->wait);
-        ret = -ERESTARTSYS;
-        goto err;
-err:
-        if (!pipe->readers && !pipe->writers)
-                free_pipe_info(inode);
-err_nocleanup:
-        mutex_unlock(&inode->i_mutex);
-        return ret;
-}
-/*
- * Dummy default file-operations: the only thing this does
- * is contain the open that then fills in the correct operations
- * depending on the access mode of the file...
- */
-const struct file_operations def_fifo_fops = {
-        .open           = fifo_open,    /* will set read_ or write_pipefifo_fops */
-        .llseek         = noop_llseek,
-};
diff --git a/fs/file.c b/fs/file.c
index 3906d9577a18..4a78f981557a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -23,24 +23,10 @@
 #include <linux/rcupdate.h>
 #include <linux/workqueue.h>
-struct fdtable_defer {
-        spinlock_t lock;
-        struct work_struct wq;
-        struct fdtable *next;
-};
 int sysctl_nr_open __read_mostly = 1024*1024;
 int sysctl_nr_open_min = BITS_PER_LONG;
 int sysctl_nr_open_max = 1024 * 1024; /* raised later */
-/*
- * We use this list to defer free fdtables that have vmalloced
- * sets/arrays. By keeping a per-cpu list, we avoid having to embed
- * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
- * this per-task structure.
- */
-static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
 static void *alloc_fdmem(size_t size)
 {
        /*
@@ -67,46 +53,9 @@ static void __free_fdtable(struct fdtable *fdt)
        kfree(fdt);
 }
-static void free_fdtable_work(struct work_struct *work)
-{
-        struct fdtable_defer *f =
-                container_of(work, struct fdtable_defer, wq);
-        struct fdtable *fdt;
-        spin_lock_bh(&f->lock);
-        fdt = f->next;
-        f->next = NULL;
-        spin_unlock_bh(&f->lock);
-        while(fdt) {
-                struct fdtable *next = fdt->next;
-                __free_fdtable(fdt);
-                fdt = next;
-        }
-}
 static void free_fdtable_rcu(struct rcu_head *rcu)
 {
-        struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
+        __free_fdtable(container_of(rcu, struct fdtable, rcu));
-        struct fdtable_defer *fddef;
-        BUG_ON(!fdt);
-        BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT);
-        if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
-                kfree(fdt->fd);
-                kfree(fdt->open_fds);
-                kfree(fdt);
-        } else {
-                fddef = &get_cpu_var(fdtable_defer_list);
-                spin_lock(&fddef->lock);
-                fdt->next = fddef->next;
-                fddef->next = fdt;
-                /* vmallocs are handled from the workqueue context */
-                schedule_work(&fddef->wq);
-                spin_unlock(&fddef->lock);
-                put_cpu_var(fdtable_defer_list);
-        }
 }
 /*
@@ -174,7 +123,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
        fdt->open_fds = data;
        data += nr / BITS_PER_BYTE;
        fdt->close_on_exec = data;
-        fdt->next = NULL;
        return fdt;
@@ -221,7 +169,7 @@ static int expand_fdtable(struct files_struct *files, int nr)
                /* Continue as planned */
                copy_fdtable(new_fdt, cur_fdt);
                rcu_assign_pointer(files->fdt, new_fdt);
-                if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
+                if (cur_fdt != &files->fdtab)
                        call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
        } else {
                /* Somebody else expanded, so undo our attempt */
@@ -316,7 +264,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
        new_fdt->close_on_exec = newf->close_on_exec_init;
        new_fdt->open_fds = newf->open_fds_init;
        new_fdt->fd = &newf->fd_array[0];
-        new_fdt->next = NULL;
        spin_lock(&oldf->file_lock);
        old_fdt = files_fdtable(oldf);
@@ -490,19 +437,8 @@ void exit_files(struct task_struct *tsk)
        }
 }
-static void fdtable_defer_list_init(int cpu)
-{
-        struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
-        spin_lock_init(&fddef->lock);
-        INIT_WORK(&fddef->wq, free_fdtable_work);
-        fddef->next = NULL;
-}
 void __init files_defer_init(void)
 {
-        int i;
-        for_each_possible_cpu(i)
-                fdtable_defer_list_init(i);
        sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
                             -BITS_PER_LONG;
 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 11dfa0c3fb46..9bfd1a3214e6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1319,7 +1319,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
                page_nr++;
                ret += buf->len;
-                if (pipe->inode)
+                if (pipe->files)
                        do_wakeup = 1;
        }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 34b80ba95bad..d15c6f21c17f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -971,7 +971,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                return err;
        count = ocount;
-        sb_start_write(inode->i_sb);
        mutex_lock(&inode->i_mutex);
        /* We can write back this queue in page reclaim */
@@ -1030,7 +1029,6 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 out:
        current->backing_dev_info = NULL;
        mutex_unlock(&inode->i_mutex);
-        sb_end_write(inode->i_sb);
        return written ? written : err;
 }
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 9f9dbeceeee7..3027f4dbbab5 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -131,6 +131,24 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping,
        return ret;
 }
+static int hpfs_write_end(struct file *file, struct address_space *mapping,
+                        loff_t pos, unsigned len, unsigned copied,
+                        struct page *pagep, void *fsdata)
+{
+        struct inode *inode = mapping->host;
+        int err;
+        err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata);
+        if (err < len)
+                hpfs_write_failed(mapping, pos + len);
+        if (!(err < 0)) {
+                /* make sure we write it on close, if not earlier */
+                hpfs_lock(inode->i_sb);
+                hpfs_i(inode)->i_dirty = 1;
+                hpfs_unlock(inode->i_sb);
+        }
+        return err;
+}
 static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block)
 {
        return generic_block_bmap(mapping,block,hpfs_get_block);
@@ -140,30 +158,16 @@ const struct address_space_operations hpfs_aops = {
        .readpage = hpfs_readpage,
        .writepage = hpfs_writepage,
        .write_begin = hpfs_write_begin,
-        .write_end = generic_write_end,
+        .write_end = hpfs_write_end,
        .bmap = _hpfs_bmap
 };
-static ssize_t hpfs_file_write(struct file *file, const char __user *buf,
-                        size_t count, loff_t *ppos)
-{
-        ssize_t retval;
-        retval = do_sync_write(file, buf, count, ppos);
-        if (retval > 0) {
-                hpfs_lock(file->f_path.dentry->d_sb);
-                hpfs_i(file_inode(file))->i_dirty = 1;
-                hpfs_unlock(file->f_path.dentry->d_sb);
-        }
-        return retval;
-}
 const struct file_operations hpfs_file_ops =
 {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
        .aio_read       = generic_file_aio_read,
-        .write          = hpfs_file_write,
+        .write          = do_sync_write,
        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
        .release        = hpfs_file_release,
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 126d3c2e2dee..cd3e38972c86 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -436,7 +436,6 @@ static int hppfs_open(struct inode *inode, struct file *file)
        path.mnt = inode->i_sb->s_fs_info;
        path.dentry = HPPFS_I(inode)->proc_dentry;
-        /* XXX This isn't closed anywhere */
        data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred);
        err = PTR_ERR(data->proc_file);
        if (IS_ERR(data->proc_file))
@@ -523,12 +522,23 @@ static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
        return default_llseek(file, off, where);
 }
+static int hppfs_release(struct inode *inode, struct file *file)
+{
+        struct hppfs_private *data = file->private_data;
+        struct file *proc_file = data->proc_file;
+        if (proc_file)
+                fput(proc_file);
+        kfree(data);
+        return 0;
+}
 static const struct file_operations hppfs_file_fops = {
        .owner          = NULL,
        .llseek         = hppfs_llseek,
        .read           = hppfs_read,
        .write          = hppfs_write,
        .open           = hppfs_open,
+        .release        = hppfs_release,
 };
 struct hppfs_dirent {
@@ -570,18 +580,12 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
        return err;
 }
-static int hppfs_fsync(struct file *file, loff_t start, loff_t end,
-                       int datasync)
-{
-        return filemap_write_and_wait_range(file->f_mapping, start, end);
-}
 static const struct file_operations hppfs_dir_fops = {
        .owner          = NULL,
        .readdir        = hppfs_readdir,
        .open           = hppfs_dir_open,
-        .fsync          = hppfs_fsync,
        .llseek         = default_llseek,
+        .release        = hppfs_release,
 };
 static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf)
diff --git a/fs/inode.c b/fs/inode.c
index a898b3d43ccf..00d5fc3b86e1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1803,7 +1803,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
                inode->i_fop = &def_blk_fops;
                inode->i_rdev = rdev;
        } else if (S_ISFIFO(mode))
-                inode->i_fop = &def_fifo_fops;
+                inode->i_fop = &pipefifo_fops;
        else if (S_ISSOCK(mode))
                inode->i_fop = &bad_sock_fops;
        else
diff --git a/fs/internal.h b/fs/internal.h
index 4be78237d896..eaa75f75b625 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -130,3 +130,8 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
 * read_write.c
 */
 extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *);
+/*
+ * pipe.c
+ */
+extern const struct file_operations pipefifo_fops;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f6c5ba027f4f..95457576e434 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -979,7 +979,7 @@ static const struct seq_operations jbd2_seq_info_ops = {
 static int jbd2_seq_info_open(struct inode *inode, struct file *file)
 {
-        journal_t *journal = PDE(inode)->data;
+        journal_t *journal = PDE_DATA(inode);
        struct jbd2_stats_proc_session *s;
        int rc, size;
diff --git a/fs/mount.h b/fs/mount.h
index cd5007980400..64a858143ff9 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -18,6 +18,12 @@ struct mnt_pcp {
        int mnt_writers;
 };
+struct mountpoint {
+        struct list_head m_hash;
+        struct dentry *m_dentry;
+        int m_count;
+};
 struct mount {
        struct list_head mnt_hash;
        struct mount *mnt_parent;
@@ -40,6 +46,7 @@ struct mount {
        struct list_head mnt_slave;     /* slave list entry */
        struct mount *mnt_master;       /* slave is on master->mnt_slave_list */
        struct mnt_namespace *mnt_ns;   /* containing namespace */
+        struct mountpoint *mnt_mp;      /* where is it mounted */
 #ifdef CONFIG_FSNOTIFY
        struct hlist_head mnt_fsnotify_marks;
        __u32 mnt_fsnotify_mask;
diff --git a/fs/namespace.c b/fs/namespace.c
index 341d3f564082..b4f96a5230a3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -21,7 +21,8 @@
 #include <linux/fs_struct.h>    /* get_fs_root et.al. */
 #include <linux/fsnotify.h>     /* fsnotify_vfsmount_delete */
 #include <linux/uaccess.h>
-#include <linux/proc_fs.h>
+#include <linux/proc_ns.h>
+#include <linux/magic.h>
 #include "pnode.h"
 #include "internal.h"
@@ -36,6 +37,7 @@ static int mnt_id_start = 0;
 static int mnt_group_start = 1;
 static struct list_head *mount_hashtable __read_mostly;
+static struct list_head *mountpoint_hashtable __read_mostly;
 static struct kmem_cache *mnt_cache __read_mostly;
 static struct rw_semaphore namespace_sem;
@@ -605,6 +607,51 @@ struct vfsmount *lookup_mnt(struct path *path)
        }
 }
+static struct mountpoint *new_mountpoint(struct dentry *dentry)
+{
+        struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry);
+        struct mountpoint *mp;
+        list_for_each_entry(mp, chain, m_hash) {
+                if (mp->m_dentry == dentry) {
+                        /* might be worth a WARN_ON() */
+                        if (d_unlinked(dentry))
+                                return ERR_PTR(-ENOENT);
+                        mp->m_count++;
+                        return mp;
+                }
+        }
+        mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+        if (!mp)
+                return ERR_PTR(-ENOMEM);
+        spin_lock(&dentry->d_lock);
+        if (d_unlinked(dentry)) {
+                spin_unlock(&dentry->d_lock);
+                kfree(mp);
+                return ERR_PTR(-ENOENT);
+        }
+        dentry->d_flags |= DCACHE_MOUNTED;
+        spin_unlock(&dentry->d_lock);
+        mp->m_dentry = dentry;
+        mp->m_count = 1;
+        list_add(&mp->m_hash, chain);
+        return mp;
+}
+static void put_mountpoint(struct mountpoint *mp)
+{
+        if (!--mp->m_count) {
+                struct dentry *dentry = mp->m_dentry;
+                spin_lock(&dentry->d_lock);
+                dentry->d_flags &= ~DCACHE_MOUNTED;
+                spin_unlock(&dentry->d_lock);
+                list_del(&mp->m_hash);
+                kfree(mp);
+        }
+}
 static inline int check_mnt(struct mount *mnt)
 {
        return mnt->mnt_ns == current->nsproxy->mnt_ns;
@@ -633,27 +680,6 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
 }
 /*
- * Clear dentry's mounted state if it has no remaining mounts.
- * vfsmount_lock must be held for write.
- */
-static void dentry_reset_mounted(struct dentry *dentry)
-{
-        unsigned u;
-        for (u = 0; u < HASH_SIZE; u++) {
-                struct mount *p;
-                list_for_each_entry(p, &mount_hashtable[u], mnt_hash) {
-                        if (p->mnt_mountpoint == dentry)
-                                return;
-                }
-        }
-        spin_lock(&dentry->d_lock);
-        dentry->d_flags &= ~DCACHE_MOUNTED;
-        spin_unlock(&dentry->d_lock);
-}
-/*
 * vfsmount lock must be held for write
 */
 static void detach_mnt(struct mount *mnt, struct path *old_path)
@@ -664,32 +690,35 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
        mnt->mnt_mountpoint = mnt->mnt.mnt_root;
        list_del_init(&mnt->mnt_child);
        list_del_init(&mnt->mnt_hash);
-        dentry_reset_mounted(old_path->dentry);
+        put_mountpoint(mnt->mnt_mp);
+        mnt->mnt_mp = NULL;
 }
 /*
 * vfsmount lock must be held for write
 */
-void mnt_set_mountpoint(struct mount *mnt, struct dentry *dentry,
+void mnt_set_mountpoint(struct mount *mnt,
+                        struct mountpoint *mp,
                        struct mount *child_mnt)
 {
+        mp->m_count++;
        mnt_add_count(mnt, 1);  /* essentially, that's mntget */
-        child_mnt->mnt_mountpoint = dget(dentry);
+        child_mnt->mnt_mountpoint = dget(mp->m_dentry);
        child_mnt->mnt_parent = mnt;
-        spin_lock(&dentry->d_lock);
+        child_mnt->mnt_mp = mp;
-        dentry->d_flags |= DCACHE_MOUNTED;
-        spin_unlock(&dentry->d_lock);
 }
 /*
 * vfsmount lock must be held for write
 */
-static void attach_mnt(struct mount *mnt, struct path *path)
+static void attach_mnt(struct mount *mnt,
+                        struct mount *parent,
+                        struct mountpoint *mp)
 {
-        mnt_set_mountpoint(real_mount(path->mnt), path->dentry, mnt);
+        mnt_set_mountpoint(parent, mp, mnt);
        list_add_tail(&mnt->mnt_hash, mount_hashtable +
-                        hash(path->mnt, path->dentry));
+                        hash(&parent->mnt, mp->m_dentry));
-        list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts);
+        list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
 }
 /*
@@ -1095,11 +1124,23 @@ int may_umount(struct vfsmount *mnt)
 EXPORT_SYMBOL(may_umount);
-void release_mounts(struct list_head *head)
+static LIST_HEAD(unmounted);    /* protected by namespace_sem */
+static void namespace_unlock(void)
 {
        struct mount *mnt;
-        while (!list_empty(head)) {
+        LIST_HEAD(head);
-                mnt = list_first_entry(head, struct mount, mnt_hash);
+        if (likely(list_empty(&unmounted))) {
+                up_write(&namespace_sem);
+                return;
+        }
+        list_splice_init(&unmounted, &head);
+        up_write(&namespace_sem);
+        while (!list_empty(&head)) {
+                mnt = list_first_entry(&head, struct mount, mnt_hash);
                list_del_init(&mnt->mnt_hash);
                if (mnt_has_parent(mnt)) {
                        struct dentry *dentry;
@@ -1119,11 +1160,16 @@ void release_mounts(struct list_head *head)
        }
 }
+static inline void namespace_lock(void)
+{
+        down_write(&namespace_sem);
+}
 /*
 * vfsmount lock must be held for write
 * namespace_sem must be held for write
 */
-void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
+void umount_tree(struct mount *mnt, int propagate)
 {
        LIST_HEAD(tmp_list);
        struct mount *p;
@@ -1142,20 +1188,20 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill)
                list_del_init(&p->mnt_child);
                if (mnt_has_parent(p)) {
                        p->mnt_parent->mnt_ghosts++;
-                        dentry_reset_mounted(p->mnt_mountpoint);
+                        put_mountpoint(p->mnt_mp);
+                        p->mnt_mp = NULL;
                }
                change_mnt_propagation(p, MS_PRIVATE);
        }
-        list_splice(&tmp_list, kill);
+        list_splice(&tmp_list, &unmounted);
 }
-static void shrink_submounts(struct mount *mnt, struct list_head *umounts);
+static void shrink_submounts(struct mount *mnt);
 static int do_umount(struct mount *mnt, int flags)
 {
        struct super_block *sb = mnt->mnt.mnt_sb;
        int retval;
-        LIST_HEAD(umount_list);
        retval = security_sb_umount(&mnt->mnt, flags);
        if (retval)
@@ -1222,22 +1268,21 @@ static int do_umount(struct mount *mnt, int flags)
                return retval;
        }
-        down_write(&namespace_sem);
+        namespace_lock();
        br_write_lock(&vfsmount_lock);
        event++;
        if (!(flags & MNT_DETACH))
-                shrink_submounts(mnt, &umount_list);
+                shrink_submounts(mnt);
        retval = -EBUSY;
        if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) {
                if (!list_empty(&mnt->mnt_list))
-                        umount_tree(mnt, 1, &umount_list);
+                        umount_tree(mnt, 1);
                retval = 0;
        }
        br_write_unlock(&vfsmount_lock);
-        up_write(&namespace_sem);
+        namespace_unlock();
-        release_mounts(&umount_list);
        return retval;
 }
@@ -1310,13 +1355,13 @@ static bool mnt_ns_loop(struct path *path)
         * mount namespace loop?
         */
        struct inode *inode = path->dentry->d_inode;
-        struct proc_inode *ei;
+        struct proc_ns *ei;
        struct mnt_namespace *mnt_ns;
        if (!proc_ns_inode(inode))
                return false;
-        ei = PROC_I(inode);
+        ei = get_proc_ns(inode);
        if (ei->ns_ops != &mntns_operations)
                return false;
@@ -1327,8 +1372,7 @@ static bool mnt_ns_loop(struct path *path)
 struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                                        int flag)
 {
-        struct mount *res, *p, *q, *r;
+        struct mount *res, *p, *q, *r, *parent;
-        struct path path;
        if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt))
                return ERR_PTR(-EINVAL);
@@ -1355,25 +1399,22 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                                q = q->mnt_parent;
                        }
                        p = s;
-                        path.mnt = &q->mnt;
+                        parent = q;
-                        path.dentry = p->mnt_mountpoint;
                        q = clone_mnt(p, p->mnt.mnt_root, flag);
                        if (IS_ERR(q))
                                goto out;
                        br_write_lock(&vfsmount_lock);
                        list_add_tail(&q->mnt_list, &res->mnt_list);
-                        attach_mnt(q, &path);
+                        attach_mnt(q, parent, p->mnt_mp);
                        br_write_unlock(&vfsmount_lock);
                }
        }
        return res;
 out:
        if (res) {
-                LIST_HEAD(umount_list);
                br_write_lock(&vfsmount_lock);
-                umount_tree(res, 0, &umount_list);
+                umount_tree(res, 0);
                br_write_unlock(&vfsmount_lock);
-                release_mounts(&umount_list);
        }
        return q;
 }
@@ -1383,10 +1424,10 @@ out:
 struct vfsmount *collect_mounts(struct path *path)
 {
        struct mount *tree;
-        down_write(&namespace_sem);
+        namespace_lock();
        tree = copy_tree(real_mount(path->mnt), path->dentry,
                         CL_COPY_ALL | CL_PRIVATE);
-        up_write(&namespace_sem);
+        namespace_unlock();
        if (IS_ERR(tree))
                return NULL;
        return &tree->mnt;
@@ -1394,13 +1435,11 @@ struct vfsmount *collect_mounts(struct path *path)
 void drop_collected_mounts(struct vfsmount *mnt)
 {
-        LIST_HEAD(umount_list);
+        namespace_lock();
-        down_write(&namespace_sem);
        br_write_lock(&vfsmount_lock);
-        umount_tree(real_mount(mnt), 0, &umount_list);
+        umount_tree(real_mount(mnt), 0);
        br_write_unlock(&vfsmount_lock);
-        up_write(&namespace_sem);
+        namespace_unlock();
-        release_mounts(&umount_list);
 }
 int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
@@ -1509,11 +1548,11 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
 * in allocations.
 */
 static int attach_recursive_mnt(struct mount *source_mnt,
-                        struct path *path, struct path *parent_path)
+                        struct mount *dest_mnt,
+                        struct mountpoint *dest_mp,
+                        struct path *parent_path)
 {
        LIST_HEAD(tree_list);
-        struct mount *dest_mnt = real_mount(path->mnt);
-        struct dentry *dest_dentry = path->dentry;
        struct mount *child, *p;
        int err;
@@ -1522,7 +1561,7 @@ static int attach_recursive_mnt(struct mount *source_mnt,
                if (err)
                        goto out;
        }
-        err = propagate_mnt(dest_mnt, dest_dentry, source_mnt, &tree_list);
+        err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
        if (err)
                goto out_cleanup_ids;
@@ -1534,10 +1573,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
        }
        if (parent_path) {
                detach_mnt(source_mnt, parent_path);
-                attach_mnt(source_mnt, path);
+                attach_mnt(source_mnt, dest_mnt, dest_mp);
                touch_mnt_namespace(source_mnt->mnt_ns);
        } else {
-                mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
+                mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
                commit_tree(source_mnt);
        }
@@ -1556,46 +1595,53 @@ static int attach_recursive_mnt(struct mount *source_mnt,
        return err;
 }
-static int lock_mount(struct path *path)
+static struct mountpoint *lock_mount(struct path *path)
 {
        struct vfsmount *mnt;
+        struct dentry *dentry = path->dentry;
 retry:
-        mutex_lock(&path->dentry->d_inode->i_mutex);
+        mutex_lock(&dentry->d_inode->i_mutex);
-        if (unlikely(cant_mount(path->dentry))) {
+        if (unlikely(cant_mount(dentry))) {
-                mutex_unlock(&path->dentry->d_inode->i_mutex);
+                mutex_unlock(&dentry->d_inode->i_mutex);
-                return -ENOENT;
+                return ERR_PTR(-ENOENT);
        }
-        down_write(&namespace_sem);
+        namespace_lock();
        mnt = lookup_mnt(path);
-        if (likely(!mnt))
+        if (likely(!mnt)) {
-                return 0;
+                struct mountpoint *mp = new_mountpoint(dentry);
-        up_write(&namespace_sem);
+                if (IS_ERR(mp)) {
+                        namespace_unlock();
+                        mutex_unlock(&dentry->d_inode->i_mutex);
+                        return mp;
+                }
+                return mp;
+        }
+        namespace_unlock();
        mutex_unlock(&path->dentry->d_inode->i_mutex);
        path_put(path);
        path->mnt = mnt;
-        path->dentry = dget(mnt->mnt_root);
+        dentry = path->dentry = dget(mnt->mnt_root);
        goto retry;
 }
-static void unlock_mount(struct path *path)
+static void unlock_mount(struct mountpoint *where)
 {
-        up_write(&namespace_sem);
+        struct dentry *dentry = where->m_dentry;
-        mutex_unlock(&path->dentry->d_inode->i_mutex);
+        put_mountpoint(where);
+        namespace_unlock();
+        mutex_unlock(&dentry->d_inode->i_mutex);
 }
-static int graft_tree(struct mount *mnt, struct path *path)
+static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
 {
        if (mnt->mnt.mnt_sb->s_flags & MS_NOUSER)
                return -EINVAL;
-        if (S_ISDIR(path->dentry->d_inode->i_mode) !=
+        if (S_ISDIR(mp->m_dentry->d_inode->i_mode) !=
              S_ISDIR(mnt->mnt.mnt_root->d_inode->i_mode))
                return -ENOTDIR;
-        if (d_unlinked(path->dentry))
+        return attach_recursive_mnt(mnt, p, mp, NULL);
-                return -ENOENT;
-        return attach_recursive_mnt(mnt, path, NULL);
 }
 /*
@@ -1633,7 +1679,7 @@ static int do_change_type(struct path *path, int flag)
        if (!type)
                return -EINVAL;
-        down_write(&namespace_sem);
+        namespace_lock();
        if (type == MS_SHARED) {
                err = invent_group_ids(mnt, recurse);
                if (err)
@@ -1646,7 +1692,7 @@ static int do_change_type(struct path *path, int flag)
        br_write_unlock(&vfsmount_lock);
 out_unlock:
-        up_write(&namespace_sem);
+        namespace_unlock();
        return err;
 }
@@ -1656,9 +1702,9 @@ static int do_change_type(struct path *path, int flag)
 static int do_loopback(struct path *path, const char *old_name,
                                int recurse)
 {
-        LIST_HEAD(umount_list);
        struct path old_path;
-        struct mount *mnt = NULL, *old;
+        struct mount *mnt = NULL, *old, *parent;
+        struct mountpoint *mp;
        int err;
        if (!old_name || !*old_name)
                return -EINVAL;
@@ -1670,17 +1716,19 @@ static int do_loopback(struct path *path, const char *old_name,
        if (mnt_ns_loop(&old_path))
                goto out; 
-        err = lock_mount(path);
+        mp = lock_mount(path);
-        if (err)
+        err = PTR_ERR(mp);
+        if (IS_ERR(mp))
                goto out;
        old = real_mount(old_path.mnt);
+        parent = real_mount(path->mnt);
        err = -EINVAL;
        if (IS_MNT_UNBINDABLE(old))
                goto out2;
-        if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old))
+        if (!check_mnt(parent) || !check_mnt(old))
                goto out2;
        if (recurse)
@@ -1693,15 +1741,14 @@ static int do_loopback(struct path *path, const char *old_name,
                goto out2;
        }
-        err = graft_tree(mnt, path);
+        err = graft_tree(mnt, parent, mp);
        if (err) {
                br_write_lock(&vfsmount_lock);
-                umount_tree(mnt, 0, &umount_list);
+                umount_tree(mnt, 0);
                br_write_unlock(&vfsmount_lock);
        }
 out2:
-        unlock_mount(path);
+        unlock_mount(mp);
-        release_mounts(&umount_list);
 out:
        path_put(&old_path);
        return err;
@@ -1786,6 +1833,7 @@ static int do_move_mount(struct path *path, const char *old_name)
        struct path old_path, parent_path;
        struct mount *p;
        struct mount *old;
+        struct mountpoint *mp;
        int err;
        if (!old_name || !*old_name)
                return -EINVAL;
@@ -1793,8 +1841,9 @@ static int do_move_mount(struct path *path, const char *old_name)
        if (err)
                return err;
-        err = lock_mount(path);
+        mp = lock_mount(path);
-        if (err < 0)
+        err = PTR_ERR(mp);
+        if (IS_ERR(mp))
                goto out;
        old = real_mount(old_path.mnt);
@@ -1804,9 +1853,6 @@ static int do_move_mount(struct path *path, const char *old_name)
        if (!check_mnt(p) || !check_mnt(old))
                goto out1;
-        if (d_unlinked(path->dentry))
-                goto out1;
        err = -EINVAL;
        if (old_path.dentry != old_path.mnt->mnt_root)
                goto out1;
@@ -1833,7 +1879,7 @@ static int do_move_mount(struct path *path, const char *old_name)
                if (p == old)
                        goto out1;
-        err = attach_recursive_mnt(old, path, &parent_path);
+        err = attach_recursive_mnt(old, real_mount(path->mnt), mp, &parent_path);
        if (err)
                goto out1;
@@ -1841,7 +1887,7 @@ static int do_move_mount(struct path *path, const char *old_name)
         * automatically */
        list_del_init(&old->mnt_expire);
 out1:
-        unlock_mount(path);
+        unlock_mount(mp);
 out:
        if (!err)
                path_put(&parent_path);
@@ -1877,21 +1923,24 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype)
 */
 static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
 {
+        struct mountpoint *mp;
+        struct mount *parent;
        int err;
        mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL);
-        err = lock_mount(path);
+        mp = lock_mount(path);
-        if (err)
+        if (IS_ERR(mp))
-                return err;
+                return PTR_ERR(mp);
+        parent = real_mount(path->mnt);
        err = -EINVAL;
-        if (unlikely(!check_mnt(real_mount(path->mnt)))) {
+        if (unlikely(!check_mnt(parent))) {
                /* that's acceptable only for automounts done in private ns */
                if (!(mnt_flags & MNT_SHRINKABLE))
                        goto unlock;
                /* ... and for those we'd better have mountpoint still alive */
-                if (!real_mount(path->mnt)->mnt_ns)
+                if (!parent->mnt_ns)
                        goto unlock;
        }
@@ -1906,10 +1955,10 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
                goto unlock;
        newmnt->mnt.mnt_flags = mnt_flags;
-        err = graft_tree(newmnt, path);
+        err = graft_tree(newmnt, parent, mp);
 unlock:
-        unlock_mount(path);
+        unlock_mount(mp);
        return err;
 }
@@ -1982,11 +2031,11 @@ int finish_automount(struct vfsmount *m, struct path *path)
 fail:
        /* remove m from any expiration list it may be on */
        if (!list_empty(&mnt->mnt_expire)) {
-                down_write(&namespace_sem);
+                namespace_lock();
                br_write_lock(&vfsmount_lock);
                list_del_init(&mnt->mnt_expire);
                br_write_unlock(&vfsmount_lock);
-                up_write(&namespace_sem);
+                namespace_unlock();
        }
        mntput(m);
        mntput(m);
@@ -2000,13 +2049,13 @@ fail:
 */
 void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
 {
-        down_write(&namespace_sem);
+        namespace_lock();
        br_write_lock(&vfsmount_lock);
        list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
        br_write_unlock(&vfsmount_lock);
-        up_write(&namespace_sem);
+        namespace_unlock();
 }
 EXPORT_SYMBOL(mnt_set_expiry);
@@ -2019,12 +2068,11 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 {
        struct mount *mnt, *next;
        LIST_HEAD(graveyard);
-        LIST_HEAD(umounts);
        if (list_empty(mounts))
                return;
-        down_write(&namespace_sem);
+        namespace_lock();
        br_write_lock(&vfsmount_lock);
        /* extract from the expiration list every vfsmount that matches the
@@ -2042,12 +2090,10 @@ void mark_mounts_for_expiry(struct list_head *mounts)
        while (!list_empty(&graveyard)) {
                mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
                touch_mnt_namespace(mnt->mnt_ns);
-                umount_tree(mnt, 1, &umounts);
+                umount_tree(mnt, 1);
        }
        br_write_unlock(&vfsmount_lock);
-        up_write(&namespace_sem);
+        namespace_unlock();
-        release_mounts(&umounts);
 }
 EXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
@@ -2104,7 +2150,7 @@ resume:
 *
 * vfsmount_lock must be held for write
 */
-static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
+static void shrink_submounts(struct mount *mnt)
 {
        LIST_HEAD(graveyard);
        struct mount *m;
@@ -2115,7 +2161,7 @@ static void shrink_submounts(struct mount *mnt, struct list_head *umounts)
                        m = list_first_entry(&graveyard, struct mount,
                                                mnt_expire);
                        touch_mnt_namespace(m->mnt_ns);
-                        umount_tree(m, 1, umounts);
+                        umount_tree(m, 1);
                }
        }
 }
@@ -2342,14 +2388,14 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
        if (IS_ERR(new_ns))
                return new_ns;
-        down_write(&namespace_sem);
+        namespace_lock();
        /* First pass: copy the tree topology */
        copy_flags = CL_COPY_ALL | CL_EXPIRE;
        if (user_ns != mnt_ns->user_ns)
                copy_flags |= CL_SHARED_TO_SLAVE | CL_UNPRIVILEGED;
        new = copy_tree(old, old->mnt.mnt_root, copy_flags);
        if (IS_ERR(new)) {
-                up_write(&namespace_sem);
+                namespace_unlock();
                free_mnt_ns(new_ns);
                return ERR_CAST(new);
        }
@@ -2380,7 +2426,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
                p = next_mnt(p, old);
                q = next_mnt(q, new);
        }
-        up_write(&namespace_sem);
+        namespace_unlock();
        if (rootmnt)
                mntput(rootmnt);
@@ -2550,7 +2596,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
                const char __user *, put_old)
 {
        struct path new, old, parent_path, root_parent, root;
-        struct mount *new_mnt, *root_mnt;
+        struct mount *new_mnt, *root_mnt, *old_mnt;
+        struct mountpoint *old_mp, *root_mp;
        int error;
        if (!may_mount())
@@ -2569,14 +2616,16 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
                goto out2;
        get_fs_root(current->fs, &root);
-        error = lock_mount(&old);
+        old_mp = lock_mount(&old);
-        if (error)
+        error = PTR_ERR(old_mp);
+        if (IS_ERR(old_mp))
                goto out3;
        error = -EINVAL;
        new_mnt = real_mount(new.mnt);
        root_mnt = real_mount(root.mnt);
-        if (IS_MNT_SHARED(real_mount(old.mnt)) ||
+        old_mnt = real_mount(old.mnt);
+        if (IS_MNT_SHARED(old_mnt) ||
                IS_MNT_SHARED(new_mnt->mnt_parent) ||
                IS_MNT_SHARED(root_mnt->mnt_parent))
                goto out4;
@@ -2585,37 +2634,37 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
        error = -ENOENT;
        if (d_unlinked(new.dentry))
                goto out4;
-        if (d_unlinked(old.dentry))
-                goto out4;
        error = -EBUSY;
-        if (new.mnt == root.mnt ||
+        if (new_mnt == root_mnt || old_mnt == root_mnt)
-            old.mnt == root.mnt)
                goto out4; /* loop, on the same file system  */
        error = -EINVAL;
        if (root.mnt->mnt_root != root.dentry)
                goto out4; /* not a mountpoint */
        if (!mnt_has_parent(root_mnt))
                goto out4; /* not attached */
+        root_mp = root_mnt->mnt_mp;
        if (new.mnt->mnt_root != new.dentry)
                goto out4; /* not a mountpoint */
        if (!mnt_has_parent(new_mnt))
                goto out4; /* not attached */
        /* make sure we can reach put_old from new_root */
-        if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new))
+        if (!is_path_reachable(old_mnt, old.dentry, &new))
                goto out4;
+        root_mp->m_count++; /* pin it so it won't go away */
        br_write_lock(&vfsmount_lock);
        detach_mnt(new_mnt, &parent_path);
        detach_mnt(root_mnt, &root_parent);
        /* mount old root on put_old */
-        attach_mnt(root_mnt, &old);
+        attach_mnt(root_mnt, old_mnt, old_mp);
        /* mount new_root on / */
-        attach_mnt(new_mnt, &root_parent);
+        attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
        touch_mnt_namespace(current->nsproxy->mnt_ns);
        br_write_unlock(&vfsmount_lock);
        chroot_fs_refs(&root, &new);
+        put_mountpoint(root_mp);
        error = 0;
 out4:
-        unlock_mount(&old);
+        unlock_mount(old_mp);
        if (!error) {
                path_put(&root_parent);
                path_put(&parent_path);
@@ -2670,14 +2719,17 @@ void __init mnt_init(void)
                        0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
        mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
+        mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
-        if (!mount_hashtable)
+        if (!mount_hashtable || !mountpoint_hashtable)
                panic("Failed to allocate mount hash table\n");
        printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
        for (u = 0; u < HASH_SIZE; u++)
                INIT_LIST_HEAD(&mount_hashtable[u]);
+        for (u = 0; u < HASH_SIZE; u++)
+                INIT_LIST_HEAD(&mountpoint_hashtable[u]);
        br_lock_init(&vfsmount_lock);
@@ -2694,16 +2746,13 @@ void __init mnt_init(void)
 void put_mnt_ns(struct mnt_namespace *ns)
 {
-        LIST_HEAD(umount_list);
        if (!atomic_dec_and_test(&ns->count))
                return;
-        down_write(&namespace_sem);
+        namespace_lock();
        br_write_lock(&vfsmount_lock);
-        umount_tree(ns->root, 0, &umount_list);
+        umount_tree(ns->root, 0);
        br_write_unlock(&vfsmount_lock);
-        up_write(&namespace_sem);
+        namespace_unlock();
-        release_mounts(&umount_list);
        free_mnt_ns(ns);
 }
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f33455b4d957..5bee0313dffd 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -177,7 +177,7 @@ static int export_features_open(struct inode *inode, struct file *file)
        return single_open(file, export_features_show, NULL);
 }
-static struct file_operations export_features_operations = {
+static const struct file_operations export_features_operations = {
        .open           = export_features_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
@@ -196,7 +196,7 @@ static int supported_enctypes_open(struct inode *inode, struct file *file)
        return single_open(file, supported_enctypes_show, NULL);
 }
-static struct file_operations supported_enctypes_ops = {
+static const struct file_operations supported_enctypes_ops = {
        .open           = supported_enctypes_open,
        .read           = seq_read,
        .llseek         = seq_lseek,
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index c616a70e8cf9..959815c1e017 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -287,9 +287,6 @@ static int inotify_release(struct inode *ignored, struct file *file)
        pr_debug("%s: group=%p\n", __func__, group);
-        if (file->f_flags & FASYNC)
-                fsnotify_fasync(-1, file, 0);
        /* free this group, matching get was inotify_init->fsnotify_obtain_group */
        fsnotify_destroy_group(group);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5b2d4f0853ac..1da4b81e6f76 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2129,7 +2129,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        BUG_ON(iocb->ki_pos != pos);
-        sb_start_write(inode->i_sb);
        mutex_lock(&inode->i_mutex);
        ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
        mutex_unlock(&inode->i_mutex);
@@ -2138,7 +2137,6 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                if (err < 0)
                        ret = err;
        }
-        sb_end_write(inode->i_sb);
        return ret;
 }
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6474cb44004d..8a7509f9e6f5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2248,8 +2248,6 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
        if (iocb->ki_left == 0)
                return 0;
-        sb_start_write(inode->i_sb);
        appending = file->f_flags & O_APPEND ? 1 : 0;
        direct_io = file->f_flags & O_DIRECT ? 1 : 0;
@@ -2423,7 +2421,6 @@ out_sems:
                ocfs2_iocb_clear_sem_locked(iocb);
        mutex_unlock(&inode->i_mutex);
-        sb_end_write(inode->i_sb);
        if (written)
                ret = written;
@@ -2468,8 +2465,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
                        out->f_path.dentry->d_name.len,
                        out->f_path.dentry->d_name.name, len);
-        if (pipe->inode)
+        pipe_lock(pipe);
-                mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
        splice_from_pipe_begin(&sd);
        do {
@@ -2489,8 +2485,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
        } while (ret > 0);
        splice_from_pipe_end(pipe, &sd);
-        if (pipe->inode)
+        pipe_unlock(pipe);
-                mutex_unlock(&pipe->inode->i_mutex);
        if (sd.num_spliced)
                ret = sd.num_spliced;
diff --git a/fs/pipe.c b/fs/pipe.c
index 2234f3f61f8d..a029a14bacf1 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -25,6 +25,8 @@
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
+#include "internal.h"
 /*
 * The max size that a non-root user is allowed to grow the pipe. Can
 * be set by root in /proc/sys/fs/pipe-max-size
@@ -53,8 +55,8 @@ unsigned int pipe_min_size = PAGE_SIZE;
 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
 {
-        if (pipe->inode)
+        if (pipe->files)
-                mutex_lock_nested(&pipe->inode->i_mutex, subclass);
+                mutex_lock_nested(&pipe->mutex, subclass);
 }
 void pipe_lock(struct pipe_inode_info *pipe)
@@ -68,11 +70,21 @@ EXPORT_SYMBOL(pipe_lock);
 void pipe_unlock(struct pipe_inode_info *pipe)
 {
-        if (pipe->inode)
+        if (pipe->files)
-                mutex_unlock(&pipe->inode->i_mutex);
+                mutex_unlock(&pipe->mutex);
 }
 EXPORT_SYMBOL(pipe_unlock);
+static inline void __pipe_lock(struct pipe_inode_info *pipe)
+{
+        mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+static inline void __pipe_unlock(struct pipe_inode_info *pipe)
+{
+        mutex_unlock(&pipe->mutex);
+}
 void pipe_double_lock(struct pipe_inode_info *pipe1,
                      struct pipe_inode_info *pipe2)
 {
@@ -361,8 +373,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
           unsigned long nr_segs, loff_t pos)
 {
        struct file *filp = iocb->ki_filp;
-        struct inode *inode = file_inode(filp);
+        struct pipe_inode_info *pipe = filp->private_data;
-        struct pipe_inode_info *pipe;
        int do_wakeup;
        ssize_t ret;
        struct iovec *iov = (struct iovec *)_iov;
@@ -375,8 +386,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
        do_wakeup = 0;
        ret = 0;
-        mutex_lock(&inode->i_mutex);
+        __pipe_lock(pipe);
-        pipe = inode->i_pipe;
        for (;;) {
                int bufs = pipe->nrbufs;
                if (bufs) {
@@ -464,7 +474,7 @@ redo:
                }
                pipe_wait(pipe);
        }
-        mutex_unlock(&inode->i_mutex);
+        __pipe_unlock(pipe);
        /* Signal writers asynchronously that there is more room. */
        if (do_wakeup) {
@@ -486,8 +496,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
            unsigned long nr_segs, loff_t ppos)
 {
        struct file *filp = iocb->ki_filp;
-        struct inode *inode = file_inode(filp);
+        struct pipe_inode_info *pipe = filp->private_data;
-        struct pipe_inode_info *pipe;
        ssize_t ret;
        int do_wakeup;
        struct iovec *iov = (struct iovec *)_iov;
@@ -501,8 +510,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
        do_wakeup = 0;
        ret = 0;
-        mutex_lock(&inode->i_mutex);
+        __pipe_lock(pipe);
-        pipe = inode->i_pipe;
        if (!pipe->readers) {
                send_sig(SIGPIPE, current, 0);
@@ -649,7 +657,7 @@ redo2:
                pipe->waiting_writers--;
        }
 out:
-        mutex_unlock(&inode->i_mutex);
+        __pipe_unlock(pipe);
        if (do_wakeup) {
                wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
@@ -662,29 +670,14 @@ out:
        return ret;
 }
-static ssize_t
-bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
-        return -EBADF;
-}
-static ssize_t
-bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
-           loff_t *ppos)
-{
-        return -EBADF;
-}
 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
-        struct inode *inode = file_inode(filp);
+        struct pipe_inode_info *pipe = filp->private_data;
-        struct pipe_inode_info *pipe;
        int count, buf, nrbufs;
        switch (cmd) {
                case FIONREAD:
-                        mutex_lock(&inode->i_mutex);
+                        __pipe_lock(pipe);
-                        pipe = inode->i_pipe;
                        count = 0;
                        buf = pipe->curbuf;
                        nrbufs = pipe->nrbufs;
@@ -692,7 +685,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                                count += pipe->bufs[buf].len;
                                buf = (buf+1) & (pipe->buffers - 1);
                        }
-                        mutex_unlock(&inode->i_mutex);
+                        __pipe_unlock(pipe);
                        return put_user(count, (int __user *)arg);
                default:
@@ -705,8 +698,7 @@ static unsigned int
 pipe_poll(struct file *filp, poll_table *wait)
 {
        unsigned int mask;
-        struct inode *inode = file_inode(filp);
+        struct pipe_inode_info *pipe = filp->private_data;
-        struct pipe_inode_info *pipe = inode->i_pipe;
        int nrbufs;
        poll_wait(filp, &pipe->wait, wait);
@@ -734,197 +726,56 @@ pipe_poll(struct file *filp, poll_table *wait)
 }
 static int
-pipe_release(struct inode *inode, int decr, int decw)
+pipe_release(struct inode *inode, struct file *file)
 {
-        struct pipe_inode_info *pipe;
+        struct pipe_inode_info *pipe = inode->i_pipe;
+        int kill = 0;
-        mutex_lock(&inode->i_mutex);
+        __pipe_lock(pipe);
-        pipe = inode->i_pipe;
+        if (file->f_mode & FMODE_READ)
-        pipe->readers -= decr;
+                pipe->readers--;
-        pipe->writers -= decw;
+        if (file->f_mode & FMODE_WRITE)
+                pipe->writers--;
-        if (!pipe->readers && !pipe->writers) {
+        if (pipe->readers || pipe->writers) {
-                free_pipe_info(inode);
-        } else {
                wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP);
                kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
                kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
        }
-        mutex_unlock(&inode->i_mutex);
+        spin_lock(&inode->i_lock);
+        if (!--pipe->files) {
-        return 0;
+                inode->i_pipe = NULL;
-}
+                kill = 1;
+        }
-static int
+        spin_unlock(&inode->i_lock);
-pipe_read_fasync(int fd, struct file *filp, int on)
+        __pipe_unlock(pipe);
-{
-        struct inode *inode = file_inode(filp);
-        int retval;
-        mutex_lock(&inode->i_mutex);
-        retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
-        mutex_unlock(&inode->i_mutex);
-        return retval;
-}
-static int
-pipe_write_fasync(int fd, struct file *filp, int on)
-{
-        struct inode *inode = file_inode(filp);
-        int retval;
-        mutex_lock(&inode->i_mutex);
+        if (kill)
-        retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
+                free_pipe_info(pipe);
-        mutex_unlock(&inode->i_mutex);
-        return retval;
+        return 0;
 }
 static int
-pipe_rdwr_fasync(int fd, struct file *filp, int on)
+pipe_fasync(int fd, struct file *filp, int on)
 {
-        struct inode *inode = file_inode(filp);
+        struct pipe_inode_info *pipe = filp->private_data;
-        struct pipe_inode_info *pipe = inode->i_pipe;
+        int retval = 0;
-        int retval;
-        mutex_lock(&inode->i_mutex);
+        __pipe_lock(pipe);
-        retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
+        if (filp->f_mode & FMODE_READ)
-        if (retval >= 0) {
+                retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
+        if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
                retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
-                if (retval < 0) /* this can happen only if on == T */
+                if (retval < 0 && (filp->f_mode & FMODE_READ))
+                        /* this can happen only if on == T */
                        fasync_helper(-1, filp, 0, &pipe->fasync_readers);
        }
-        mutex_unlock(&inode->i_mutex);
+        __pipe_unlock(pipe);
        return retval;
 }
+struct pipe_inode_info *alloc_pipe_info(void)
-static int
-pipe_read_release(struct inode *inode, struct file *filp)
-{
-        return pipe_release(inode, 1, 0);
-}
-static int
-pipe_write_release(struct inode *inode, struct file *filp)
-{
-        return pipe_release(inode, 0, 1);
-}
-static int
-pipe_rdwr_release(struct inode *inode, struct file *filp)
-{
-        int decr, decw;
-        decr = (filp->f_mode & FMODE_READ) != 0;
-        decw = (filp->f_mode & FMODE_WRITE) != 0;
-        return pipe_release(inode, decr, decw);
-}
-static int
-pipe_read_open(struct inode *inode, struct file *filp)
-{
-        int ret = -ENOENT;
-        mutex_lock(&inode->i_mutex);
-        if (inode->i_pipe) {
-                ret = 0;
-                inode->i_pipe->readers++;
-        }
-        mutex_unlock(&inode->i_mutex);
-        return ret;
-}
-static int
-pipe_write_open(struct inode *inode, struct file *filp)
-{
-        int ret = -ENOENT;
-        mutex_lock(&inode->i_mutex);
-        if (inode->i_pipe) {
-                ret = 0;
-                inode->i_pipe->writers++;
-        }
-        mutex_unlock(&inode->i_mutex);
-        return ret;
-}
-static int
-pipe_rdwr_open(struct inode *inode, struct file *filp)
-{
-        int ret = -ENOENT;
-        if (!(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
-                return -EINVAL;
-        mutex_lock(&inode->i_mutex);
-        if (inode->i_pipe) {
-                ret = 0;
-                if (filp->f_mode & FMODE_READ)
-                        inode->i_pipe->readers++;
-                if (filp->f_mode & FMODE_WRITE)
-                        inode->i_pipe->writers++;
-        }
-        mutex_unlock(&inode->i_mutex);
-        return ret;
-}
-/*
- * The file_operations structs are not static because they
- * are also used in linux/fs/fifo.c to do operations on FIFOs.
- *
- * Pipes reuse fifos' file_operations structs.
- */
-const struct file_operations read_pipefifo_fops = {
-        .llseek         = no_llseek,
-        .read           = do_sync_read,
-        .aio_read       = pipe_read,
-        .write          = bad_pipe_w,
-        .poll           = pipe_poll,
-        .unlocked_ioctl = pipe_ioctl,
-        .open           = pipe_read_open,
-        .release        = pipe_read_release,
-        .fasync         = pipe_read_fasync,
-};
-const struct file_operations write_pipefifo_fops = {
-        .llseek         = no_llseek,
-        .read           = bad_pipe_r,
-        .write          = do_sync_write,
-        .aio_write      = pipe_write,
-        .poll           = pipe_poll,
-        .unlocked_ioctl = pipe_ioctl,
-        .open           = pipe_write_open,
-        .release        = pipe_write_release,
-        .fasync         = pipe_write_fasync,
-};
-const struct file_operations rdwr_pipefifo_fops = {
-        .llseek         = no_llseek,
-        .read           = do_sync_read,
-        .aio_read       = pipe_read,
-        .write          = do_sync_write,
-        .aio_write      = pipe_write,
-        .poll           = pipe_poll,
-        .unlocked_ioctl = pipe_ioctl,
-        .open           = pipe_rdwr_open,
-        .release        = pipe_rdwr_release,
-        .fasync         = pipe_rdwr_fasync,
-};
-struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
 {
        struct pipe_inode_info *pipe;
@@ -934,8 +785,8 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
                if (pipe->bufs) {
                        init_waitqueue_head(&pipe->wait);
                        pipe->r_counter = pipe->w_counter = 1;
-                        pipe->inode = inode;
                        pipe->buffers = PIPE_DEF_BUFFERS;
+                        mutex_init(&pipe->mutex);
                        return pipe;
                }
                kfree(pipe);
@@ -944,7 +795,7 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
        return NULL;
 }
-void __free_pipe_info(struct pipe_inode_info *pipe)
+void free_pipe_info(struct pipe_inode_info *pipe)
 {
        int i;
@@ -959,12 +810,6 @@ void __free_pipe_info(struct pipe_inode_info *pipe)
        kfree(pipe);
 }
-void free_pipe_info(struct inode *inode)
-{
-        __free_pipe_info(inode->i_pipe);
-        inode->i_pipe = NULL;
-}
 static struct vfsmount *pipe_mnt __read_mostly;
 /*
@@ -990,13 +835,14 @@ static struct inode * get_pipe_inode(void)
        inode->i_ino = get_next_ino();
-        pipe = alloc_pipe_info(inode);
+        pipe = alloc_pipe_info();
        if (!pipe)
                goto fail_iput;
-        inode->i_pipe = pipe;
+        inode->i_pipe = pipe;
+        pipe->files = 2;
        pipe->readers = pipe->writers = 1;
-        inode->i_fop = &rdwr_pipefifo_fops;
+        inode->i_fop = &pipefifo_fops;
        /*
         * Mark the inode dirty from the very beginning,
@@ -1039,17 +885,19 @@ int create_pipe_files(struct file **res, int flags)
        d_instantiate(path.dentry, inode);
        err = -ENFILE;
-        f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
+        f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops);
        if (IS_ERR(f))
                goto err_dentry;
        f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
+        f->private_data = inode->i_pipe;
-        res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops);
+        res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops);
        if (IS_ERR(res[0]))
                goto err_file;
        path_get(&path);
+        res[0]->private_data = inode->i_pipe;
        res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
        res[1] = f;
        return 0;
@@ -1057,12 +905,12 @@ int create_pipe_files(struct file **res, int flags)
 err_file:
        put_filp(f);
 err_dentry:
-        free_pipe_info(inode);
+        free_pipe_info(inode->i_pipe);
        path_put(&path);
        return err;
 err_inode:
-        free_pipe_info(inode);
+        free_pipe_info(inode->i_pipe);
        iput(inode);
        return err;
 }
@@ -1144,6 +992,168 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
        return sys_pipe2(fildes, 0);
 }
+static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
+{
+        int cur = *cnt; 
+        while (cur == *cnt) {
+                pipe_wait(pipe);
+                if (signal_pending(current))
+                        break;
+        }
+        return cur == *cnt ? -ERESTARTSYS : 0;
+}
+static void wake_up_partner(struct pipe_inode_info *pipe)
+{
+        wake_up_interruptible(&pipe->wait);
+}
+static int fifo_open(struct inode *inode, struct file *filp)
+{
+        struct pipe_inode_info *pipe;
+        bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
+        int kill = 0;
+        int ret;
+        filp->f_version = 0;
+        spin_lock(&inode->i_lock);
+        if (inode->i_pipe) {
+                pipe = inode->i_pipe;
+                pipe->files++;
+                spin_unlock(&inode->i_lock);
+        } else {
+                spin_unlock(&inode->i_lock);
+                pipe = alloc_pipe_info();
+                if (!pipe)
+                        return -ENOMEM;
+                pipe->files = 1;
+                spin_lock(&inode->i_lock);
+                if (unlikely(inode->i_pipe)) {
+                        inode->i_pipe->files++;
+                        spin_unlock(&inode->i_lock);
+                        free_pipe_info(pipe);
+                        pipe = inode->i_pipe;
+                } else {
+                        inode->i_pipe = pipe;
+                        spin_unlock(&inode->i_lock);
+                }
+        }
+        filp->private_data = pipe;
+        /* OK, we have a pipe and it's pinned down */
+        __pipe_lock(pipe);
+        /* We can only do regular read/write on fifos */
+        filp->f_mode &= (FMODE_READ | FMODE_WRITE);
+        switch (filp->f_mode) {
+        case FMODE_READ:
+        /*
+         *  O_RDONLY
+         *  POSIX.1 says that O_NONBLOCK means return with the FIFO
+         *  opened, even when there is no process writing the FIFO.
+         */
+                pipe->r_counter++;
+                if (pipe->readers++ == 0)
+                        wake_up_partner(pipe);
+                if (!is_pipe && !pipe->writers) {
+                        if ((filp->f_flags & O_NONBLOCK)) {
+                                /* suppress POLLHUP until we have
+                                 * seen a writer */
+                                filp->f_version = pipe->w_counter;
+                        } else {
+                                if (wait_for_partner(pipe, &pipe->w_counter))
+                                        goto err_rd;
+                        }
+                }
+                break;
+        
+        case FMODE_WRITE:
+        /*
+         *  O_WRONLY
+         *  POSIX.1 says that O_NONBLOCK means return -1 with
+         *  errno=ENXIO when there is no process reading the FIFO.
+         */
+                ret = -ENXIO;
+                if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers)
+                        goto err;
+                pipe->w_counter++;
+                if (!pipe->writers++)
+                        wake_up_partner(pipe);
+                if (!is_pipe && !pipe->readers) {
+                        if (wait_for_partner(pipe, &pipe->r_counter))
+                                goto err_wr;
+                }
+                break;
+        
+        case FMODE_READ | FMODE_WRITE:
+        /*
+         *  O_RDWR
+         *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
+         *  This implementation will NEVER block on a O_RDWR open, since
+         *  the process can at least talk to itself.
+         */
+                pipe->readers++;
+                pipe->writers++;
+                pipe->r_counter++;
+                pipe->w_counter++;
+                if (pipe->readers == 1 || pipe->writers == 1)
+                        wake_up_partner(pipe);
+                break;
+        default:
+                ret = -EINVAL;
+                goto err;
+        }
+        /* Ok! */
+        __pipe_unlock(pipe);
+        return 0;
+err_rd:
+        if (!--pipe->readers)
+                wake_up_interruptible(&pipe->wait);
+        ret = -ERESTARTSYS;
+        goto err;
+err_wr:
+        if (!--pipe->writers)
+                wake_up_interruptible(&pipe->wait);
+        ret = -ERESTARTSYS;
+        goto err;
+err:
+        spin_lock(&inode->i_lock);
+        if (!--pipe->files) {
+                inode->i_pipe = NULL;
+                kill = 1;
+        }
+        spin_unlock(&inode->i_lock);
+        __pipe_unlock(pipe);
+        if (kill)
+                free_pipe_info(pipe);
+        return ret;
+}
+const struct file_operations pipefifo_fops = {
+        .open           = fifo_open,
+        .llseek         = no_llseek,
+        .read           = do_sync_read,
+        .aio_read       = pipe_read,
+        .write          = do_sync_write,
+        .aio_write      = pipe_write,
+        .poll           = pipe_poll,
+        .unlocked_ioctl = pipe_ioctl,
+        .release        = pipe_release,
+        .fasync         = pipe_fasync,
+};
 /*
 * Allocate a new array of pipe buffers and copy the info over. Returns the
 * pipe size if successful, or return -ERROR on error.
@@ -1229,9 +1239,7 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
 */
 struct pipe_inode_info *get_pipe_info(struct file *file)
 {
-        struct inode *i = file_inode(file);
+        return file->f_op == &pipefifo_fops ? file->private_data : NULL;
-        return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
 }
 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -1243,7 +1251,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
        if (!pipe)
                return -EBADF;
-        mutex_lock(&pipe->inode->i_mutex);
+        __pipe_lock(pipe);
        switch (cmd) {
        case F_SETPIPE_SZ: {
@@ -1272,7 +1280,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
        }
 out:
-        mutex_unlock(&pipe->inode->i_mutex);
+        __pipe_unlock(pipe);
        return ret;
 }
diff --git a/fs/pnode.c b/fs/pnode.c
index 8b29d2164da6..3d2a7141b87a 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -218,7 +218,7 @@ static struct mount *get_source(struct mount *dest,
 * @source_mnt: source mount.
 * @tree_list : list of heads of trees to be attached.
 */
-int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
+int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
                    struct mount *source_mnt, struct list_head *tree_list)
 {
        struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
@@ -227,7 +227,6 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
        struct mount *prev_dest_mnt = dest_mnt;
        struct mount *prev_src_mnt  = source_mnt;
        LIST_HEAD(tmp_list);
-        LIST_HEAD(umount_list);
        for (m = propagation_next(dest_mnt, dest_mnt); m;
                        m = propagation_next(m, dest_mnt)) {
@@ -250,8 +249,8 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry,
                        goto out;
                }
-                if (is_subdir(dest_dentry, m->mnt.mnt_root)) {
+                if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
-                        mnt_set_mountpoint(m, dest_dentry, child);
+                        mnt_set_mountpoint(m, dest_mp, child);
                        list_add_tail(&child->mnt_hash, tree_list);
                } else {
                        /*
@@ -267,10 +266,9 @@ out:
        br_write_lock(&vfsmount_lock);
        while (!list_empty(&tmp_list)) {
                child = list_first_entry(&tmp_list, struct mount, mnt_hash);
-                umount_tree(child, 0, &umount_list);
+                umount_tree(child, 0);
        }
        br_write_unlock(&vfsmount_lock);
-        release_mounts(&umount_list);
        return ret;
 }
diff --git a/fs/pnode.h b/fs/pnode.h
index a0493d5ebfbf..b091445c1c4a 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -32,17 +32,16 @@ static inline void set_mnt_shared(struct mount *mnt)
 }
 void change_mnt_propagation(struct mount *, int);
-int propagate_mnt(struct mount *, struct dentry *, struct mount *,
+int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
                struct list_head *);
 int propagate_umount(struct list_head *);
 int propagate_mount_busy(struct mount *, int);
 void mnt_release_group_id(struct mount *);
 int get_dominating_id(struct mount *mnt, const struct path *root);
 unsigned int mnt_get_count(struct mount *mnt);
-void mnt_set_mountpoint(struct mount *, struct dentry *,
+void mnt_set_mountpoint(struct mount *, struct mountpoint *,
                        struct mount *);
-void release_mounts(struct list_head *);
+void umount_tree(struct mount *, int);
-void umount_tree(struct mount *, int, struct list_head *);
 struct mount *copy_tree(struct mount *, struct dentry *, int);
 bool is_path_reachable(struct mount *, struct dentry *,
                         const struct path *root);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3861bcec41ff..dd51e50001fe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -405,6 +405,37 @@ static const struct file_operations proc_lstats_operations = {
 #endif
+#ifdef CONFIG_CGROUPS
+static int cgroup_open(struct inode *inode, struct file *file)
+{
+        struct pid *pid = PROC_I(inode)->pid;
+        return single_open(file, proc_cgroup_show, pid);
+}
+static const struct file_operations proc_cgroup_operations = {
+        .open           = cgroup_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+#endif
+#ifdef CONFIG_PROC_PID_CPUSET
+static int cpuset_open(struct inode *inode, struct file *file)
+{
+        struct pid *pid = PROC_I(inode)->pid;
+        return single_open(file, proc_cpuset_show, pid);
+}
+static const struct file_operations proc_cpuset_operations = {
+        .open           = cpuset_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
+#endif
 static int proc_oom_score(struct task_struct *task, char *buffer)
 {
        unsigned long totalpages = totalram_pages + total_swap_pages;
@@ -1621,6 +1652,15 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
        return 0;
 }
+int pid_delete_dentry(const struct dentry *dentry)
+{
+        /* Is the task we represent dead?
+         * If so, then don't put the dentry on the lru list,
+         * kill it immediately.
+         */
+        return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+}
 const struct dentry_operations pid_dentry_operations =
 {
        .d_revalidate   = pid_revalidate,
@@ -2893,7 +2933,7 @@ retry:
        return iter;
 }
-#define TGID_OFFSET (FIRST_PROCESS_ENTRY)
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1)
 static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
        struct tgid_iter iter)
@@ -2916,13 +2956,21 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
        struct tgid_iter iter;
        struct pid_namespace *ns;
        filldir_t __filldir;
+        loff_t pos = filp->f_pos;
-        if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET)
+        if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
                goto out;
-        ns = filp->f_dentry->d_sb->s_fs_info;
+        if (pos == TGID_OFFSET - 1) {
+                if (proc_fill_cache(filp, dirent, filldir, "self", 4,
+                                        NULL, NULL, NULL) < 0)
+                        goto out;
+                iter.tgid = 0;
+        } else {
+                iter.tgid = pos - TGID_OFFSET;
+        }
        iter.task = NULL;
-        iter.tgid = filp->f_pos - TGID_OFFSET;
+        ns = filp->f_dentry->d_sb->s_fs_info;
        for (iter = next_tgid(ns, iter);
             iter.task;
             iter.tgid += 1, iter = next_tgid(ns, iter)) {
diff --git a/fs/proc/fd.h b/fs/proc/fd.h
index cbb1d47deda8..7c047f256ae2 100644
--- a/fs/proc/fd.h
+++ b/fs/proc/fd.h
@@ -11,4 +11,9 @@ extern const struct inode_operations proc_fdinfo_inode_operations;
 extern int proc_fd_permission(struct inode *inode, int mask);
+static inline int proc_fd(struct inode *inode)
+{
+        return PROC_I(inode)->fd;
+}
 #endif /* __PROCFS_FD_H__ */
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 21e1a8f1659d..a2596afffae6 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -36,212 +36,6 @@ static int proc_match(unsigned int len, const char *name, struct proc_dir_entry
        return !memcmp(name, de->name, len);
 }
-/* buffer size is one page but our output routines use some slack for overruns */
-#define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
-static ssize_t
-__proc_file_read(struct file *file, char __user *buf, size_t nbytes,
-               loff_t *ppos)
-{
-        struct inode * inode = file_inode(file);
-        char    *page;
-        ssize_t retval=0;
-        int     eof=0;
-        ssize_t n, count;
-        char    *start;
-        struct proc_dir_entry * dp;
-        unsigned long long pos;
-        /*
-         * Gaah, please just use "seq_file" instead. The legacy /proc
-         * interfaces cut loff_t down to off_t for reads, and ignore
-         * the offset entirely for writes..
-         */
-        pos = *ppos;
-        if (pos > MAX_NON_LFS)
-                return 0;
-        if (nbytes > MAX_NON_LFS - pos)
-                nbytes = MAX_NON_LFS - pos;
-        dp = PDE(inode);
-        if (!(page = (char*) __get_free_page(GFP_TEMPORARY)))
-                return -ENOMEM;
-        while ((nbytes > 0) && !eof) {
-                count = min_t(size_t, PROC_BLOCK_SIZE, nbytes);
-                start = NULL;
-                if (dp->read_proc) {
-                        /*
-                         * How to be a proc read function
-                         * ------------------------------
-                         * Prototype:
-                         *    int f(char *buffer, char **start, off_t offset,
-                         *          int count, int *peof, void *dat)
-                         *
-                         * Assume that the buffer is "count" bytes in size.
-                         *
-                         * If you know you have supplied all the data you
-                         * have, set *peof.
-                         *
-                         * You have three ways to return data:
-                         * 0) Leave *start = NULL.  (This is the default.)
-                         *    Put the data of the requested offset at that
-                         *    offset within the buffer.  Return the number (n)
-                         *    of bytes there are from the beginning of the
-                         *    buffer up to the last byte of data.  If the
-                         *    number of supplied bytes (= n - offset) is 
-                         *    greater than zero and you didn't signal eof
-                         *    and the reader is prepared to take more data
-                         *    you will be called again with the requested
-                         *    offset advanced by the number of bytes 
-                         *    absorbed.  This interface is useful for files
-                         *    no larger than the buffer.
-                         * 1) Set *start = an unsigned long value less than
-                         *    the buffer address but greater than zero.
-                         *    Put the data of the requested offset at the
-                         *    beginning of the buffer.  Return the number of
-                         *    bytes of data placed there.  If this number is
-                         *    greater than zero and you didn't signal eof
-                         *    and the reader is prepared to take more data
-                         *    you will be called again with the requested
-                         *    offset advanced by *start.  This interface is
-                         *    useful when you have a large file consisting
-                         *    of a series of blocks which you want to count
-                         *    and return as wholes.
-                         *    (Hack by Paul.Russell@rustcorp.com.au)
-                         * 2) Set *start = an address within the buffer.
-                         *    Put the data of the requested offset at *start.
-                         *    Return the number of bytes of data placed there.
-                         *    If this number is greater than zero and you
-                         *    didn't signal eof and the reader is prepared to
-                         *    take more data you will be called again with the
-                         *    requested offset advanced by the number of bytes
-                         *    absorbed.
-                         */
-                        n = dp->read_proc(page, &start, *ppos,
-                                          count, &eof, dp->data);
-                } else
-                        break;
-                if (n == 0)   /* end of file */
-                        break;
-                if (n < 0) {  /* error */
-                        if (retval == 0)
-                                retval = n;
-                        break;
-                }
-                if (start == NULL) {
-                        if (n > PAGE_SIZE)      /* Apparent buffer overflow */
-                                n = PAGE_SIZE;
-                        n -= *ppos;
-                        if (n <= 0)
-                                break;
-                        if (n > count)
-                                n = count;
-                        start = page + *ppos;
-                } else if (start < page) {
-                        if (n > PAGE_SIZE)      /* Apparent buffer overflow */
-                                n = PAGE_SIZE;
-                        if (n > count) {
-                                /*
-                                 * Don't reduce n because doing so might
-                                 * cut off part of a data block.
-                                 */
-                                pr_warn("proc_file_read: count exceeded\n");
-                        }
-                } else /* start >= page */ {
-                        unsigned long startoff = (unsigned long)(start - page);
-                        if (n > (PAGE_SIZE - startoff)) /* buffer overflow? */
-                                n = PAGE_SIZE - startoff;
-                        if (n > count)
-                                n = count;
-                }
-                
-                n -= copy_to_user(buf, start < page ? page : start, n);
-                if (n == 0) {
-                        if (retval == 0)
-                                retval = -EFAULT;
-                        break;
-                }
-                *ppos += start < page ? (unsigned long)start : n;
-                nbytes -= n;
-                buf += n;
-                retval += n;
-        }
-        free_page((unsigned long) page);
-        return retval;
-}
-static ssize_t
-proc_file_read(struct file *file, char __user *buf, size_t nbytes,
-               loff_t *ppos)
-{
-        struct proc_dir_entry *pde = PDE(file_inode(file));
-        ssize_t rv = -EIO;
-        spin_lock(&pde->pde_unload_lock);
-        if (!pde->proc_fops) {
-                spin_unlock(&pde->pde_unload_lock);
-                return rv;
-        }
-        pde->pde_users++;
-        spin_unlock(&pde->pde_unload_lock);
-        rv = __proc_file_read(file, buf, nbytes, ppos);
-        pde_users_dec(pde);
-        return rv;
-}
-static ssize_t
-proc_file_write(struct file *file, const char __user *buffer,
-                size_t count, loff_t *ppos)
-{
-        struct proc_dir_entry *pde = PDE(file_inode(file));
-        ssize_t rv = -EIO;
-        if (pde->write_proc) {
-                spin_lock(&pde->pde_unload_lock);
-                if (!pde->proc_fops) {
-                        spin_unlock(&pde->pde_unload_lock);
-                        return rv;
-                }
-                pde->pde_users++;
-                spin_unlock(&pde->pde_unload_lock);
-                /* FIXME: does this routine need ppos?  probably... */
-                rv = pde->write_proc(file, buffer, count, pde->data);
-                pde_users_dec(pde);
-        }
-        return rv;
-}
-static loff_t
-proc_file_lseek(struct file *file, loff_t offset, int orig)
-{
-        loff_t retval = -EINVAL;
-        switch (orig) {
-        case 1:
-                offset += file->f_pos;
-        /* fallthrough */
-        case 0:
-                if (offset < 0 || offset > MAX_NON_LFS)
-                        break;
-                file->f_pos = retval = offset;
-        }
-        return retval;
-}
-static const struct file_operations proc_file_operations = {
-        .llseek         = proc_file_lseek,
-        .read           = proc_file_read,
-        .write          = proc_file_write,
-};
 static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
 {
        struct inode *inode = dentry->d_inode;
@@ -371,7 +165,7 @@ void proc_free_inum(unsigned int inum)
 static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-        nd_set_link(nd, PDE(dentry->d_inode)->data);
+        nd_set_link(nd, __PDE_DATA(dentry->d_inode));
        return NULL;
 }
@@ -541,19 +335,17 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
                return ret;
        if (S_ISDIR(dp->mode)) {
-                if (dp->proc_iops == NULL) {
+                dp->proc_fops = &proc_dir_operations;
-                        dp->proc_fops = &proc_dir_operations;
+                dp->proc_iops = &proc_dir_inode_operations;
-                        dp->proc_iops = &proc_dir_inode_operations;
-                }
                dir->nlink++;
        } else if (S_ISLNK(dp->mode)) {
-                if (dp->proc_iops == NULL)
+                dp->proc_iops = &proc_link_inode_operations;
-                        dp->proc_iops = &proc_link_inode_operations;
        } else if (S_ISREG(dp->mode)) {
-                if (dp->proc_fops == NULL)
+                BUG_ON(dp->proc_fops == NULL);
-                        dp->proc_fops = &proc_file_operations;
+                dp->proc_iops = &proc_file_inode_operations;
-                if (dp->proc_iops == NULL)
+        } else {
-                        dp->proc_iops = &proc_file_inode_operations;
+                WARN_ON(1);
+                return -EINVAL;
        }
        spin_lock(&proc_subdir_lock);
@@ -636,13 +428,17 @@ struct proc_dir_entry *proc_symlink(const char *name,
 }
 EXPORT_SYMBOL(proc_symlink);
-struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
+struct proc_dir_entry *proc_mkdir_data(const char *name, umode_t mode,
-                struct proc_dir_entry *parent)
+                struct proc_dir_entry *parent, void *data)
 {
        struct proc_dir_entry *ent;
+        if (mode == 0)
+                mode = S_IRUGO | S_IXUGO;
        ent = __proc_create(&parent, name, S_IFDIR | mode, 2);
        if (ent) {
+                ent->data = data;
                if (proc_register(parent, ent) < 0) {
                        kfree(ent);
                        ent = NULL;
@@ -650,82 +446,39 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
        }
        return ent;
 }
-EXPORT_SYMBOL(proc_mkdir_mode);
+EXPORT_SYMBOL_GPL(proc_mkdir_data);
-struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
+struct proc_dir_entry *proc_mkdir_mode(const char *name, umode_t mode,
-                struct proc_dir_entry *parent)
+                                       struct proc_dir_entry *parent)
 {
-        struct proc_dir_entry *ent;
+        return proc_mkdir_data(name, mode, parent, NULL);
-        ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2);
-        if (ent) {
-                ent->data = net;
-                if (proc_register(parent, ent) < 0) {
-                        kfree(ent);
-                        ent = NULL;
-                }
-        }
-        return ent;
 }
-EXPORT_SYMBOL_GPL(proc_net_mkdir);
+EXPORT_SYMBOL(proc_mkdir_mode);
 struct proc_dir_entry *proc_mkdir(const char *name,
                struct proc_dir_entry *parent)
 {
-        return proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
+        return proc_mkdir_data(name, 0, parent, NULL);
 }
 EXPORT_SYMBOL(proc_mkdir);
-struct proc_dir_entry *create_proc_entry(const char *name, umode_t mode,
-                                         struct proc_dir_entry *parent)
-{
-        struct proc_dir_entry *ent;
-        nlink_t nlink;
-        if (S_ISDIR(mode)) {
-                if ((mode & S_IALLUGO) == 0)
-                        mode |= S_IRUGO | S_IXUGO;
-                nlink = 2;
-        } else {
-                if ((mode & S_IFMT) == 0)
-                        mode |= S_IFREG;
-                if ((mode & S_IALLUGO) == 0)
-                        mode |= S_IRUGO;
-                nlink = 1;
-        }
-        ent = __proc_create(&parent, name, mode, nlink);
-        if (ent) {
-                if (proc_register(parent, ent) < 0) {
-                        kfree(ent);
-                        ent = NULL;
-                }
-        }
-        return ent;
-}
-EXPORT_SYMBOL(create_proc_entry);
 struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
                                        struct proc_dir_entry *parent,
                                        const struct file_operations *proc_fops,
                                        void *data)
 {
        struct proc_dir_entry *pde;
-        nlink_t nlink;
+        if ((mode & S_IFMT) == 0)
+                mode |= S_IFREG;
-        if (S_ISDIR(mode)) {
+        if (!S_ISREG(mode)) {
-                if ((mode & S_IALLUGO) == 0)
+                WARN_ON(1);     /* use proc_mkdir() */
-                        mode |= S_IRUGO | S_IXUGO;
+                return NULL;
-                nlink = 2;
-        } else {
-                if ((mode & S_IFMT) == 0)
-                        mode |= S_IFREG;
-                if ((mode & S_IALLUGO) == 0)
-                        mode |= S_IRUGO;
-                nlink = 1;
        }
-        pde = __proc_create(&parent, name, mode, nlink);
+        if ((mode & S_IALLUGO) == 0)
+                mode |= S_IRUGO;
+        pde = __proc_create(&parent, name, mode, 1);
        if (!pde)
                goto out;
        pde->proc_fops = proc_fops;
@@ -739,6 +492,19 @@ out:
        return NULL;
 }
 EXPORT_SYMBOL(proc_create_data);
+ 
+void proc_set_size(struct proc_dir_entry *de, loff_t size)
+{
+        de->size = size;
+}
+EXPORT_SYMBOL(proc_set_size);
+void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid)
+{
+        de->uid = uid;
+        de->gid = gid;
+}
+EXPORT_SYMBOL(proc_set_user);
 static void free_proc_entry(struct proc_dir_entry *de)
 {
@@ -755,41 +521,6 @@ void pde_put(struct proc_dir_entry *pde)
                free_proc_entry(pde);
 }
-static void entry_rundown(struct proc_dir_entry *de)
-{
-        spin_lock(&de->pde_unload_lock);
-        /*
-         * Stop accepting new callers into module. If you're
-         * dynamically allocating ->proc_fops, save a pointer somewhere.
-         */
-        de->proc_fops = NULL;
-        /* Wait until all existing callers into module are done. */
-        if (de->pde_users > 0) {
-                DECLARE_COMPLETION_ONSTACK(c);
-                if (!de->pde_unload_completion)
-                        de->pde_unload_completion = &c;
-                spin_unlock(&de->pde_unload_lock);
-                wait_for_completion(de->pde_unload_completion);
-                spin_lock(&de->pde_unload_lock);
-        }
-        while (!list_empty(&de->pde_openers)) {
-                struct pde_opener *pdeo;
-                pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
-                list_del(&pdeo->lh);
-                spin_unlock(&de->pde_unload_lock);
-                pdeo->release(pdeo->inode, pdeo->file);
-                kfree(pdeo);
-                spin_lock(&de->pde_unload_lock);
-        }
-        spin_unlock(&de->pde_unload_lock);
-}
 /*
 * Remove a /proc entry and free it if it's not currently in use.
 */
@@ -821,7 +552,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
                return;
        }
-        entry_rundown(de);
+        proc_entry_rundown(de);
        if (S_ISDIR(de->mode))
                parent->nlink--;
@@ -870,7 +601,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
                }
                spin_unlock(&proc_subdir_lock);
-                entry_rundown(de);
+                proc_entry_rundown(de);
                next = de->parent;
                if (S_ISDIR(de->mode))
                        next->nlink--;
@@ -886,3 +617,23 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
        return 0;
 }
 EXPORT_SYMBOL(remove_proc_subtree);
+void *proc_get_parent_data(const struct inode *inode)
+{
+        struct proc_dir_entry *de = PDE(inode);
+        return de->parent->data;
+}
+EXPORT_SYMBOL_GPL(proc_get_parent_data);
+void proc_remove(struct proc_dir_entry *de)
+{
+        if (de)
+                remove_proc_subtree(de->name, de->parent);
+}
+EXPORT_SYMBOL(proc_remove);
+void *PDE_DATA(const struct inode *inode)
+{
+        return __PDE_DATA(inode);
+}
+EXPORT_SYMBOL(PDE_DATA);
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 869116c2afbe..073aea60cf8f 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -22,6 +22,7 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/mount.h>
+#include <linux/magic.h>
 #include <asm/uaccess.h>
@@ -50,8 +51,8 @@ static void proc_evict_inode(struct inode *inode)
                sysctl_head_put(head);
        }
        /* Release any associated namespace */
-        ns_ops = PROC_I(inode)->ns_ops;
+        ns_ops = PROC_I(inode)->ns.ns_ops;
-        ns = PROC_I(inode)->ns;
+        ns = PROC_I(inode)->ns.ns;
        if (ns_ops && ns)
                ns_ops->put(ns);
 }
@@ -72,8 +73,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
        ei->pde = NULL;
        ei->sysctl = NULL;
        ei->sysctl_entry = NULL;
-        ei->ns = NULL;
+        ei->ns.ns = NULL;
-        ei->ns_ops = NULL;
+        ei->ns.ns_ops = NULL;
        inode = &ei->vfs_inode;
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
        return inode;
@@ -129,96 +130,100 @@ static const struct super_operations proc_sops = {
        .show_options   = proc_show_options,
 };
-static void __pde_users_dec(struct proc_dir_entry *pde)
+enum {BIAS = -1U<<31};
+static inline int use_pde(struct proc_dir_entry *pde)
+{
+        return atomic_inc_unless_negative(&pde->in_use);
+}
+static void unuse_pde(struct proc_dir_entry *pde)
 {
-        pde->pde_users--;
+        if (atomic_dec_return(&pde->in_use) == BIAS)
-        if (pde->pde_unload_completion && pde->pde_users == 0)
                complete(pde->pde_unload_completion);
 }
-void pde_users_dec(struct proc_dir_entry *pde)
+/* pde is locked */
+static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo)
 {
-        spin_lock(&pde->pde_unload_lock);
+        if (pdeo->closing) {
-        __pde_users_dec(pde);
+                /* somebody else is doing that, just wait */
-        spin_unlock(&pde->pde_unload_lock);
+                DECLARE_COMPLETION_ONSTACK(c);
+                pdeo->c = &c;
+                spin_unlock(&pde->pde_unload_lock);
+                wait_for_completion(&c);
+                spin_lock(&pde->pde_unload_lock);
+        } else {
+                struct file *file;
+                pdeo->closing = 1;
+                spin_unlock(&pde->pde_unload_lock);
+                file = pdeo->file;
+                pde->proc_fops->release(file_inode(file), file);
+                spin_lock(&pde->pde_unload_lock);
+                list_del_init(&pdeo->lh);
+                if (pdeo->c)
+                        complete(pdeo->c);
+                kfree(pdeo);
+        }
+}
+void proc_entry_rundown(struct proc_dir_entry *de)
+{
+        DECLARE_COMPLETION_ONSTACK(c);
+        /* Wait until all existing callers into module are done. */
+        de->pde_unload_completion = &c;
+        if (atomic_add_return(BIAS, &de->in_use) != BIAS)
+                wait_for_completion(&c);
+        spin_lock(&de->pde_unload_lock);
+        while (!list_empty(&de->pde_openers)) {
+                struct pde_opener *pdeo;
+                pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
+                close_pdeo(de, pdeo);
+        }
+        spin_unlock(&de->pde_unload_lock);
 }
 static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
 {
        struct proc_dir_entry *pde = PDE(file_inode(file));
        loff_t rv = -EINVAL;
-        loff_t (*llseek)(struct file *, loff_t, int);
+        if (use_pde(pde)) {
+                loff_t (*llseek)(struct file *, loff_t, int);
-        spin_lock(&pde->pde_unload_lock);
+                llseek = pde->proc_fops->llseek;
-        /*
+                if (!llseek)
-         * remove_proc_entry() is going to delete PDE (as part of module
+                        llseek = default_llseek;
-         * cleanup sequence). No new callers into module allowed.
+                rv = llseek(file, offset, whence);
-         */
+                unuse_pde(pde);
-        if (!pde->proc_fops) {
-                spin_unlock(&pde->pde_unload_lock);
-                return rv;
        }
-        /*
-         * Bump refcount so that remove_proc_entry will wail for ->llseek to
-         * complete.
-         */
-        pde->pde_users++;
-        /*
-         * Save function pointer under lock, to protect against ->proc_fops
-         * NULL'ifying right after ->pde_unload_lock is dropped.
-         */
-        llseek = pde->proc_fops->llseek;
-        spin_unlock(&pde->pde_unload_lock);
-        if (!llseek)
-                llseek = default_llseek;
-        rv = llseek(file, offset, whence);
-        pde_users_dec(pde);
        return rv;
 }
 static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
+        ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
        struct proc_dir_entry *pde = PDE(file_inode(file));
        ssize_t rv = -EIO;
-        ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
+        if (use_pde(pde)) {
+                read = pde->proc_fops->read;
-        spin_lock(&pde->pde_unload_lock);
+                if (read)
-        if (!pde->proc_fops) {
+                        rv = read(file, buf, count, ppos);
-                spin_unlock(&pde->pde_unload_lock);
+                unuse_pde(pde);
-                return rv;
        }
-        pde->pde_users++;
-        read = pde->proc_fops->read;
-        spin_unlock(&pde->pde_unload_lock);
-        if (read)
-                rv = read(file, buf, count, ppos);
-        pde_users_dec(pde);
        return rv;
 }
 static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
 {
+        ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
        struct proc_dir_entry *pde = PDE(file_inode(file));
        ssize_t rv = -EIO;
-        ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
+        if (use_pde(pde)) {
+                write = pde->proc_fops->write;
-        spin_lock(&pde->pde_unload_lock);
+                if (write)
-        if (!pde->proc_fops) {
+                        rv = write(file, buf, count, ppos);
-                spin_unlock(&pde->pde_unload_lock);
+                unuse_pde(pde);
-                return rv;
        }
-        pde->pde_users++;
-        write = pde->proc_fops->write;
-        spin_unlock(&pde->pde_unload_lock);
-        if (write)
-                rv = write(file, buf, count, ppos);
-        pde_users_dec(pde);
        return rv;
 }
@@ -227,20 +232,12 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p
        struct proc_dir_entry *pde = PDE(file_inode(file));
        unsigned int rv = DEFAULT_POLLMASK;
        unsigned int (*poll)(struct file *, struct poll_table_struct *);
+        if (use_pde(pde)) {
-        spin_lock(&pde->pde_unload_lock);
+                poll = pde->proc_fops->poll;
-        if (!pde->proc_fops) {
+                if (poll)
-                spin_unlock(&pde->pde_unload_lock);
+                        rv = poll(file, pts);
-                return rv;
+                unuse_pde(pde);
        }
-        pde->pde_users++;
-        poll = pde->proc_fops->poll;
-        spin_unlock(&pde->pde_unload_lock);
-        if (poll)
-                rv = poll(file, pts);
-        pde_users_dec(pde);
        return rv;
 }
@@ -249,20 +246,12 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
        struct proc_dir_entry *pde = PDE(file_inode(file));
        long rv = -ENOTTY;
        long (*ioctl)(struct file *, unsigned int, unsigned long);
+        if (use_pde(pde)) {
-        spin_lock(&pde->pde_unload_lock);
+                ioctl = pde->proc_fops->unlocked_ioctl;
-        if (!pde->proc_fops) {
+                if (ioctl)
-                spin_unlock(&pde->pde_unload_lock);
+                        rv = ioctl(file, cmd, arg);
-                return rv;
+                unuse_pde(pde);
        }
-        pde->pde_users++;
-        ioctl = pde->proc_fops->unlocked_ioctl;
-        spin_unlock(&pde->pde_unload_lock);
-        if (ioctl)
-                rv = ioctl(file, cmd, arg);
-        pde_users_dec(pde);
        return rv;
 }
@@ -272,20 +261,12 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned
        struct proc_dir_entry *pde = PDE(file_inode(file));
        long rv = -ENOTTY;
        long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
+        if (use_pde(pde)) {
-        spin_lock(&pde->pde_unload_lock);
+                compat_ioctl = pde->proc_fops->compat_ioctl;
-        if (!pde->proc_fops) {
+                if (compat_ioctl)
-                spin_unlock(&pde->pde_unload_lock);
+                        rv = compat_ioctl(file, cmd, arg);
-                return rv;
+                unuse_pde(pde);
        }
-        pde->pde_users++;
-        compat_ioctl = pde->proc_fops->compat_ioctl;
-        spin_unlock(&pde->pde_unload_lock);
-        if (compat_ioctl)
-                rv = compat_ioctl(file, cmd, arg);
-        pde_users_dec(pde);
        return rv;
 }
 #endif
@@ -295,20 +276,12 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
        struct proc_dir_entry *pde = PDE(file_inode(file));
        int rv = -EIO;
        int (*mmap)(struct file *, struct vm_area_struct *);
+        if (use_pde(pde)) {
-        spin_lock(&pde->pde_unload_lock);
+                mmap = pde->proc_fops->mmap;
-        if (!pde->proc_fops) {
+                if (mmap)
-                spin_unlock(&pde->pde_unload_lock);
+                        rv = mmap(file, vma);
-                return rv;
+                unuse_pde(pde);
        }
-        pde->pde_users++;
-        mmap = pde->proc_fops->mmap;
-        spin_unlock(&pde->pde_unload_lock);
-        if (mmap)
-                rv = mmap(file, vma);
-        pde_users_dec(pde);
        return rv;
 }
@@ -330,91 +303,47 @@ static int proc_reg_open(struct inode *inode, struct file *file)
         * by hand in remove_proc_entry(). For this, save opener's credentials
         * for later.
         */
-        pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
+        pdeo = kzalloc(sizeof(struct pde_opener), GFP_KERNEL);
        if (!pdeo)
                return -ENOMEM;
-        spin_lock(&pde->pde_unload_lock);
+        if (!use_pde(pde)) {
-        if (!pde->proc_fops) {
-                spin_unlock(&pde->pde_unload_lock);
                kfree(pdeo);
                return -ENOENT;
        }
-        pde->pde_users++;
        open = pde->proc_fops->open;
        release = pde->proc_fops->release;
-        spin_unlock(&pde->pde_unload_lock);
        if (open)
                rv = open(inode, file);
-        spin_lock(&pde->pde_unload_lock);
        if (rv == 0 && release) {
                /* To know what to release. */
-                pdeo->inode = inode;
                pdeo->file = file;
                /* Strictly for "too late" ->release in proc_reg_release(). */
-                pdeo->release = release;
+                spin_lock(&pde->pde_unload_lock);
                list_add(&pdeo->lh, &pde->pde_openers);
+                spin_unlock(&pde->pde_unload_lock);
        } else
                kfree(pdeo);
-        __pde_users_dec(pde);
-        spin_unlock(&pde->pde_unload_lock);
-        return rv;
-}
-static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
-                                        struct inode *inode, struct file *file)
-{
-        struct pde_opener *pdeo;
-        list_for_each_entry(pdeo, &pde->pde_openers, lh) {
+        unuse_pde(pde);
-                if (pdeo->inode == inode && pdeo->file == file)
+        return rv;
-                        return pdeo;
-        }
-        return NULL;
 }
 static int proc_reg_release(struct inode *inode, struct file *file)
 {
        struct proc_dir_entry *pde = PDE(inode);
-        int rv = 0;
-        int (*release)(struct inode *, struct file *);
        struct pde_opener *pdeo;
        spin_lock(&pde->pde_unload_lock);
-        pdeo = find_pde_opener(pde, inode, file);
+        list_for_each_entry(pdeo, &pde->pde_openers, lh) {
-        if (!pde->proc_fops) {
+                if (pdeo->file == file) {
-                /*
+                        close_pdeo(pde, pdeo);
-                 * Can't simply exit, __fput() will think that everything is OK,
+                        break;
-                 * and move on to freeing struct file. remove_proc_entry() will
+                }
-                 * find slacker in opener's list and will try to do non-trivial
-                 * things with struct file. Therefore, remove opener from list.
-                 *
-                 * But if opener is removed from list, who will ->release it?
-                 */
-                if (pdeo) {
-                        list_del(&pdeo->lh);
-                        spin_unlock(&pde->pde_unload_lock);
-                        rv = pdeo->release(inode, file);
-                        kfree(pdeo);
-                } else
-                        spin_unlock(&pde->pde_unload_lock);
-                return rv;
-        }
-        pde->pde_users++;
-        release = pde->proc_fops->release;
-        if (pdeo) {
-                list_del(&pdeo->lh);
-                kfree(pdeo);
        }
        spin_unlock(&pde->pde_unload_lock);
+        return 0;
-        if (release)
-                rv = release(inode, file);
-        pde_users_dec(pde);
-        return rv;
 }
 static const struct file_operations proc_reg_file_ops = {
@@ -462,8 +391,8 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
                        inode->i_size = de->size;
                if (de->nlink)
                        set_nlink(inode, de->nlink);
-                if (de->proc_iops)
+                WARN_ON(!de->proc_iops);
-                        inode->i_op = de->proc_iops;
+                inode->i_op = de->proc_iops;
                if (de->proc_fops) {
                        if (S_ISREG(inode->i_mode)) {
 #ifdef CONFIG_COMPAT
@@ -506,5 +435,5 @@ int proc_fill_super(struct super_block *s)
                return -ENOMEM;
        }
-        return 0;
+        return proc_setup_self(s);
 }
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 75710357a517..d600fb098b6a 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -1,4 +1,4 @@
-/* internal.h: internal procfs definitions
+/* Internal procfs definitions
 *
 * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
@@ -9,62 +9,83 @@
 * 2 of the License, or (at your option) any later version.
 */
-#include <linux/sched.h>
 #include <linux/proc_fs.h>
+#include <linux/proc_ns.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
 #include <linux/binfmts.h>
-struct  ctl_table_header;
-struct  mempolicy;
-extern struct proc_dir_entry proc_root;
+struct ctl_table_header;
-extern void proc_self_init(void);
+struct mempolicy;
-#ifdef CONFIG_PROC_SYSCTL
-extern int proc_sys_init(void);
-extern void sysctl_head_put(struct ctl_table_header *head);
-#else
-static inline void proc_sys_init(void) { }
-static inline void sysctl_head_put(struct ctl_table_header *head) { }
-#endif
-#ifdef CONFIG_NET
-extern int proc_net_init(void);
-#else
-static inline int proc_net_init(void) { return 0; }
-#endif
-extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
+/*
-                                struct pid *pid, struct task_struct *task);
+ * This is not completely implemented yet. The idea is to
-extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
+ * create an in-memory tree (like the actual /proc filesystem
-                                struct pid *pid, struct task_struct *task);
+ * tree) of these proc_dir_entries, so that we can dynamically
-extern int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
+ * add new files to /proc.
-                                struct pid *pid, struct task_struct *task);
+ *
-extern int proc_pid_statm(struct seq_file *m, struct pid_namespace *ns,
+ * The "next" pointer creates a linked list of one /proc directory,
-                                struct pid *pid, struct task_struct *task);
+ * while parent/subdir create the directory structure (every
-extern loff_t mem_lseek(struct file *file, loff_t offset, int orig);
+ * /proc file has a parent, but "subdir" is NULL for all
+ * non-directory entries).
+ */
+struct proc_dir_entry {
+        unsigned int low_ino;
+        umode_t mode;
+        nlink_t nlink;
+        kuid_t uid;
+        kgid_t gid;
+        loff_t size;
+        const struct inode_operations *proc_iops;
+        const struct file_operations *proc_fops;
+        struct proc_dir_entry *next, *parent, *subdir;
+        void *data;
+        atomic_t count;         /* use count */
+        atomic_t in_use;        /* number of callers into module in progress; */
+                        /* negative -> it's going away RSN */
+        struct completion *pde_unload_completion;
+        struct list_head pde_openers;   /* who did ->open, but not ->release */
+        spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
+        u8 namelen;
+        char name[];
+};
-extern const struct file_operations proc_tid_children_operations;
+union proc_op {
-extern const struct file_operations proc_pid_maps_operations;
+        int (*proc_get_link)(struct dentry *, struct path *);
-extern const struct file_operations proc_tid_maps_operations;
+        int (*proc_read)(struct task_struct *task, char *page);
-extern const struct file_operations proc_pid_numa_maps_operations;
+        int (*proc_show)(struct seq_file *m,
-extern const struct file_operations proc_tid_numa_maps_operations;
+                struct pid_namespace *ns, struct pid *pid,
-extern const struct file_operations proc_pid_smaps_operations;
+                struct task_struct *task);
-extern const struct file_operations proc_tid_smaps_operations;
+};
-extern const struct file_operations proc_clear_refs_operations;
-extern const struct file_operations proc_pagemap_operations;
-extern const struct file_operations proc_net_operations;
-extern const struct inode_operations proc_net_inode_operations;
-extern const struct inode_operations proc_pid_link_inode_operations;
-struct proc_maps_private {
+struct proc_inode {
        struct pid *pid;
-        struct task_struct *task;
+        int fd;
-#ifdef CONFIG_MMU
+        union proc_op op;
-        struct vm_area_struct *tail_vma;
+        struct proc_dir_entry *pde;
-#endif
+        struct ctl_table_header *sysctl;
-#ifdef CONFIG_NUMA
+        struct ctl_table *sysctl_entry;
-        struct mempolicy *task_mempolicy;
+        struct proc_ns ns;
-#endif
+        struct inode vfs_inode;
 };
-void proc_init_inodecache(void);
+/*
+ * General functions
+ */
+static inline struct proc_inode *PROC_I(const struct inode *inode)
+{
+        return container_of(inode, struct proc_inode, vfs_inode);
+}
+static inline struct proc_dir_entry *PDE(const struct inode *inode)
+{
+        return PROC_I(inode)->pde;
+}
+static inline void *__PDE_DATA(const struct inode *inode)
+{
+        return PDE(inode)->data;
+}
 static inline struct pid *proc_pid(struct inode *inode)
 {
@@ -76,11 +97,6 @@ static inline struct task_struct *get_proc_task(struct inode *inode)
        return get_pid_task(proc_pid(inode), PIDTYPE_PID);
 }
-static inline int proc_fd(struct inode *inode)
-{
-        return PROC_I(inode)->fd;
-}
 static inline int task_dumpable(struct task_struct *task)
 {
        int dumpable = 0;
@@ -96,15 +112,6 @@ static inline int task_dumpable(struct task_struct *task)
        return 0;
 }
-static inline int pid_delete_dentry(const struct dentry * dentry)
-{
-        /* Is the task we represent dead?
-         * If so, then don't put the dentry on the lru list,
-         * kill it immediately.
-         */
-        return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
-}
 static inline unsigned name_to_int(struct dentry *dentry)
 {
        const char *name = dentry->d_name.name;
@@ -127,63 +134,165 @@ out:
        return ~0U;
 }
-struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
+/*
-                struct dentry *dentry);
+ * Offset of the first process in the /proc root directory..
-int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
+ */
-                filldir_t filldir);
+#define FIRST_PROCESS_ENTRY 256
+/* Worst case buffer size needed for holding an integer. */
+#define PROC_NUMBUF 13
-struct pde_opener {
+/*
-        struct inode *inode;
+ * array.c
-        struct file *file;
+ */
-        int (*release)(struct inode *, struct file *);
+extern const struct file_operations proc_tid_children_operations;
-        struct list_head lh;
-};
-void pde_users_dec(struct proc_dir_entry *pde);
+extern int proc_tid_stat(struct seq_file *, struct pid_namespace *,
+                         struct pid *, struct task_struct *);
+extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *,
+                          struct pid *, struct task_struct *);
+extern int proc_pid_status(struct seq_file *, struct pid_namespace *,
+                           struct pid *, struct task_struct *);
+extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
+                          struct pid *, struct task_struct *);
+/*
+ * base.c
+ */
+extern const struct dentry_operations pid_dentry_operations;
+extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int proc_setattr(struct dentry *, struct iattr *);
+extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *);
+extern int pid_revalidate(struct dentry *, unsigned int);
+extern int pid_delete_dentry(const struct dentry *);
+extern int proc_pid_readdir(struct file *, void *, filldir_t);
+extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int);
+extern loff_t mem_lseek(struct file *, loff_t, int);
+/* Lookups */
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+                                     struct task_struct *, const void *);
+extern int proc_fill_cache(struct file *, void *, filldir_t, const char *, int,
+                           instantiate_t, struct task_struct *, const void *);
+/*
+ * generic.c
+ */
 extern spinlock_t proc_subdir_lock;
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int);
+extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
-int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
+extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
-unsigned long task_vsize(struct mm_struct *);
+                                     struct dentry *);
-unsigned long task_statm(struct mm_struct *,
+extern int proc_readdir(struct file *, void *, filldir_t);
-        unsigned long *, unsigned long *, unsigned long *, unsigned long *);
+extern int proc_readdir_de(struct proc_dir_entry *, struct file *, void *, filldir_t);
-void task_mem(struct seq_file *, struct mm_struct *);
 static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
 {
        atomic_inc(&pde->count);
        return pde;
 }
-void pde_put(struct proc_dir_entry *pde);
+extern void pde_put(struct proc_dir_entry *);
-int proc_fill_super(struct super_block *);
-struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
-int proc_remount(struct super_block *sb, int *flags, char *data);
 /*
- * These are generic /proc routines that use the internal
+ * inode.c
- * "struct proc_dir_entry" tree to traverse the filesystem.
- *
- * The /proc root directory has extended versions to take care
- * of the /proc/<pid> subdirectories.
 */
-int proc_readdir(struct file *, void *, filldir_t);
+struct pde_opener {
-struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
+        struct file *file;
+        struct list_head lh;
+        int closing;
+        struct completion *c;
+};
+extern const struct inode_operations proc_pid_link_inode_operations;
+extern void proc_init_inodecache(void);
+extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *);
+extern int proc_fill_super(struct super_block *);
+extern void proc_entry_rundown(struct proc_dir_entry *);
-/* Lookups */
+/*
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+ * proc_devtree.c
-                                struct task_struct *, const void *);
+ */
-int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+#ifdef CONFIG_PROC_DEVICETREE
-        const char *name, int len,
+extern void proc_device_tree_init(void);
-        instantiate_t instantiate, struct task_struct *task, const void *ptr);
+#endif
-int pid_revalidate(struct dentry *dentry, unsigned int flags);
-struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task);
-extern const struct dentry_operations pid_dentry_operations;
-int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
-int proc_setattr(struct dentry *dentry, struct iattr *attr);
+/*
+ * proc_namespaces.c
+ */
 extern const struct inode_operations proc_ns_dir_inode_operations;
 extern const struct file_operations proc_ns_dir_operations;
+/*
+ * proc_net.c
+ */
+extern const struct file_operations proc_net_operations;
+extern const struct inode_operations proc_net_inode_operations;
+#ifdef CONFIG_NET
+extern int proc_net_init(void);
+#else
+static inline int proc_net_init(void) { return 0; }
+#endif
+/*
+ * proc_self.c
+ */
+extern int proc_setup_self(struct super_block *);
+/*
+ * proc_sysctl.c
+ */
+#ifdef CONFIG_PROC_SYSCTL
+extern int proc_sys_init(void);
+extern void sysctl_head_put(struct ctl_table_header *);
+#else
+static inline void proc_sys_init(void) { }
+static inline void sysctl_head_put(struct ctl_table_header *head) { }
+#endif
+/*
+ * proc_tty.c
+ */
+#ifdef CONFIG_TTY
+extern void proc_tty_init(void);
+#else
+static inline void proc_tty_init(void) {}
+#endif
+/*
+ * root.c
+ */
+extern struct proc_dir_entry proc_root;
+extern void proc_self_init(void);
+extern int proc_remount(struct super_block *, int *, char *);
+/*
+ * task_[no]mmu.c
+ */
+struct proc_maps_private {
+        struct pid *pid;
+        struct task_struct *task;
+#ifdef CONFIG_MMU
+        struct vm_area_struct *tail_vma;
+#endif
+#ifdef CONFIG_NUMA
+        struct mempolicy *task_mempolicy;
+#endif
+};
+extern const struct file_operations proc_pid_maps_operations;
+extern const struct file_operations proc_tid_maps_operations;
+extern const struct file_operations proc_pid_numa_maps_operations;
+extern const struct file_operations proc_tid_numa_maps_operations;
+extern const struct file_operations proc_pid_smaps_operations;
+extern const struct file_operations proc_tid_smaps_operations;
+extern const struct file_operations proc_clear_refs_operations;
+extern const struct file_operations proc_pagemap_operations;
+extern unsigned long task_vsize(struct mm_struct *);
+extern unsigned long task_statm(struct mm_struct *,
+                                unsigned long *, unsigned long *,
+                                unsigned long *, unsigned long *);
+extern void task_mem(struct seq_file *, struct mm_struct *);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index f6a13f489e30..0a22194e5d58 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -11,6 +11,7 @@
 #include <linux/mm.h>
 #include <linux/proc_fs.h>
+#include <linux/kcore.h>
 #include <linux/user.h>
 #include <linux/capability.h>
 #include <linux/elf.h>
@@ -28,6 +29,7 @@
 #include <linux/ioport.h>
 #include <linux/memory.h>
 #include <asm/sections.h>
+#include "internal.h"
 #define CORE_STR "CORE"
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 66b51c0383da..54bdc6701e9f 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -51,7 +51,7 @@ static int ns_delete_dentry(const struct dentry *dentry)
 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 {
        struct inode *inode = dentry->d_inode;
-        const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops;
+        const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
        return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
                ns_ops->name, inode->i_ino);
@@ -95,8 +95,8 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb,
                inode->i_op = &ns_inode_operations;
                inode->i_mode = S_IFREG | S_IRUGO;
                inode->i_fop = &ns_file_operations;
-                ei->ns_ops = ns_ops;
+                ei->ns.ns_ops = ns_ops;
-                ei->ns = ns;
+                ei->ns.ns = ns;
                unlock_new_inode(inode);
        } else {
                ns_ops->put(ns);
@@ -128,7 +128,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
        if (!ptrace_may_access(task, PTRACE_MODE_READ))
                goto out_put_task;
-        ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns_ops);
+        ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops);
        if (IS_ERR(ns_path.dentry)) {
                error = ERR_CAST(ns_path.dentry);
                goto out_put_task;
@@ -148,7 +148,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
 {
        struct inode *inode = dentry->d_inode;
        struct proc_inode *ei = PROC_I(inode);
-        const struct proc_ns_operations *ns_ops = ei->ns_ops;
+        const struct proc_ns_operations *ns_ops = ei->ns.ns_ops;
        struct task_struct *task;
        void *ns;
        char name[50];
@@ -202,7 +202,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
        ei = PROC_I(inode);
        inode->i_mode = S_IFLNK|S_IRWXUGO;
        inode->i_op = &proc_ns_link_inode_operations;
-        ei->ns_ops = ns_ops;
+        ei->ns.ns_ops = ns_ops;
        d_set_d_op(dentry, &pid_dentry_operations);
        d_add(dentry, inode);
@@ -337,6 +337,11 @@ out_invalid:
        return ERR_PTR(-EINVAL);
 }
+struct proc_ns *get_proc_ns(struct inode *inode)
+{
+        return &PROC_I(inode)->ns;
+}
 bool proc_ns_inode(struct inode *inode)
 {
        return inode->i_fop == &ns_file_operations;
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 30b590f5bd35..505afc950e0a 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -41,7 +41,7 @@ static int property_proc_show(struct seq_file *m, void *v)
 static int property_proc_open(struct inode *inode, struct file *file)
 {
-        return single_open(file, property_proc_show, PDE(inode)->data);
+        return single_open(file, property_proc_show, __PDE_DATA(inode));
 }
 static const struct file_operations property_proc_fops = {
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index b4ac6572474f..986e83220d56 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -26,6 +26,10 @@
 #include "internal.h"
+static inline struct net *PDE_NET(struct proc_dir_entry *pde)
+{
+        return pde->parent->data;
+}
 static struct net *get_proc_net(const struct inode *inode)
 {
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 9c7fab1d23f0..41a6ea93f486 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -141,6 +141,8 @@ static void proc_kill_sb(struct super_block *sb)
        struct pid_namespace *ns;
        ns = (struct pid_namespace *)sb->s_fs_info;
+        if (ns->proc_self)
+                dput(ns->proc_self);
        kill_anon_super(sb);
        put_pid_ns(ns);
 }
diff --git a/fs/proc/self.c b/fs/proc/self.c
index aa5cc3bff140..6b6a993b5c25 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -1,6 +1,8 @@
-#include <linux/proc_fs.h>
 #include <linux/sched.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
+#include <linux/pid_namespace.h>
+#include "internal.h"
 /*
 * /proc/self:
@@ -48,12 +50,43 @@ static const struct inode_operations proc_self_inode_operations = {
        .put_link       = proc_self_put_link,
 };
-void __init proc_self_init(void)
+static unsigned self_inum;
+int proc_setup_self(struct super_block *s)
 {
-        struct proc_dir_entry *proc_self_symlink;
+        struct inode *root_inode = s->s_root->d_inode;
-        mode_t mode;
+        struct pid_namespace *ns = s->s_fs_info;
+        struct dentry *self;
+        
+        mutex_lock(&root_inode->i_mutex);
+        self = d_alloc_name(s->s_root, "self");
+        if (self) {
+                struct inode *inode = new_inode_pseudo(s);
+                if (inode) {
+                        inode->i_ino = self_inum;
+                        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+                        inode->i_mode = S_IFLNK | S_IRWXUGO;
+                        inode->i_uid = GLOBAL_ROOT_UID;
+                        inode->i_gid = GLOBAL_ROOT_GID;
+                        inode->i_op = &proc_self_inode_operations;
+                        d_add(self, inode);
+                } else {
+                        dput(self);
+                        self = ERR_PTR(-ENOMEM);
+                }
+        } else {
+                self = ERR_PTR(-ENOMEM);
+        }
+        mutex_unlock(&root_inode->i_mutex);
+        if (IS_ERR(self)) {
+                pr_err("proc_fill_super: can't allocate /proc/self\n");
+                return PTR_ERR(self);
+        }
+        ns->proc_self = self;
+        return 0;
+}
-        mode = S_IFLNK | S_IRWXUGO;
+void __init proc_self_init(void)
-        proc_self_symlink = proc_create("self", mode, NULL, NULL );
+{
-        proc_self_symlink->proc_iops = &proc_self_inode_operations;
+        proc_alloc_inum(&self_inum);
 }
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index b870f740ab5a..17f7e080d7ff 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -8,7 +8,7 @@
 */
 #include <linux/mm.h>
-#include <linux/proc_fs.h>
+#include <linux/kcore.h>
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
@@ -22,6 +22,7 @@
 #include <linux/list.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
+#include "internal.h"
 /* List representing chunks of contiguous memory areas and their offsets in
 * vmcore file.
@@ -698,7 +699,7 @@ void vmcore_cleanup(void)
        struct list_head *pos, *next;
        if (proc_vmcore) {
-                remove_proc_entry(proc_vmcore->name, proc_vmcore->parent);
+                proc_remove(proc_vmcore);
                proc_vmcore = NULL;
        }
diff --git a/fs/read_write.c b/fs/read_write.c
index 8274a794253b..605dbbcb1973 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -459,6 +459,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
        ret = rw_verify_area(WRITE, file, pos, count);
        if (ret >= 0) {
                count = ret;
+                file_start_write(file);
                if (file->f_op->write)
                        ret = file->f_op->write(file, buf, count, pos);
                else
@@ -468,6 +469,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
                        add_wchar(current, ret);
                }
                inc_syscw(current);
+                file_end_write(file);
        }
        return ret;
@@ -576,7 +578,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
 }
 EXPORT_SYMBOL(iov_shorten);
-ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
+static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
                unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn)
 {
        struct kiocb kiocb;
@@ -601,7 +603,7 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 }
 /* Do it by hand, with file-ops */
-ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
+static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
                unsigned long nr_segs, loff_t *ppos, io_fn_t fn)
 {
        struct iovec *vector = iov;
@@ -743,6 +745,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
        } else {
                fn = (io_fn_t)file->f_op->write;
                fnv = file->f_op->aio_write;
+                file_start_write(file);
        }
        if (fnv)
@@ -751,6 +754,9 @@ static ssize_t do_readv_writev(int type, struct file *file,
        else
                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+        if (type != READ)
+                file_end_write(file);
 out:
        if (iov != iovstack)
                kfree(iov);
@@ -881,6 +887,201 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
        return ret;
 }
+#ifdef CONFIG_COMPAT
+static ssize_t compat_do_readv_writev(int type, struct file *file,
+                               const struct compat_iovec __user *uvector,
+                               unsigned long nr_segs, loff_t *pos)
+{
+        compat_ssize_t tot_len;
+        struct iovec iovstack[UIO_FASTIOV];
+        struct iovec *iov = iovstack;
+        ssize_t ret;
+        io_fn_t fn;
+        iov_fn_t fnv;
+        ret = -EINVAL;
+        if (!file->f_op)
+                goto out;
+        ret = -EFAULT;
+        if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
+                goto out;
+        ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
+                                               UIO_FASTIOV, iovstack, &iov);
+        if (ret <= 0)
+                goto out;
+        tot_len = ret;
+        ret = rw_verify_area(type, file, pos, tot_len);
+        if (ret < 0)
+                goto out;
+        fnv = NULL;
+        if (type == READ) {
+                fn = file->f_op->read;
+                fnv = file->f_op->aio_read;
+        } else {
+                fn = (io_fn_t)file->f_op->write;
+                fnv = file->f_op->aio_write;
+                file_start_write(file);
+        }
+        if (fnv)
+                ret = do_sync_readv_writev(file, iov, nr_segs, tot_len,
+                                                pos, fnv);
+        else
+                ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn);
+        if (type != READ)
+                file_end_write(file);
+out:
+        if (iov != iovstack)
+                kfree(iov);
+        if ((ret + (type == READ)) > 0) {
+                if (type == READ)
+                        fsnotify_access(file);
+                else
+                        fsnotify_modify(file);
+        }
+        return ret;
+}
+static size_t compat_readv(struct file *file,
+                           const struct compat_iovec __user *vec,
+                           unsigned long vlen, loff_t *pos)
+{
+        ssize_t ret = -EBADF;
+        if (!(file->f_mode & FMODE_READ))
+                goto out;
+        ret = -EINVAL;
+        if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read))
+                goto out;
+        ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
+out:
+        if (ret > 0)
+                add_rchar(current, ret);
+        inc_syscr(current);
+        return ret;
+}
+COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd,
+                const struct compat_iovec __user *,vec,
+                unsigned long, vlen)
+{
+        struct fd f = fdget(fd);
+        ssize_t ret;
+        loff_t pos;
+        if (!f.file)
+                return -EBADF;
+        pos = f.file->f_pos;
+        ret = compat_readv(f.file, vec, vlen, &pos);
+        f.file->f_pos = pos;
+        fdput(f);
+        return ret;
+}
+COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
+                const struct compat_iovec __user *,vec,
+                unsigned long, vlen, loff_t, pos)
+{
+        struct fd f;
+        ssize_t ret;
+        if (pos < 0)
+                return -EINVAL;
+        f = fdget(fd);
+        if (!f.file)
+                return -EBADF;
+        ret = -ESPIPE;
+        if (f.file->f_mode & FMODE_PREAD)
+                ret = compat_readv(f.file, vec, vlen, &pos);
+        fdput(f);
+        return ret;
+}
+COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd,
+                const struct compat_iovec __user *,vec,
+                unsigned long, vlen, u32, pos_low, u32, pos_high)
+{
+        loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+        return compat_sys_preadv64(fd, vec, vlen, pos);
+}
+static size_t compat_writev(struct file *file,
+                            const struct compat_iovec __user *vec,
+                            unsigned long vlen, loff_t *pos)
+{
+        ssize_t ret = -EBADF;
+        if (!(file->f_mode & FMODE_WRITE))
+                goto out;
+        ret = -EINVAL;
+        if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write))
+                goto out;
+        ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
+out:
+        if (ret > 0)
+                add_wchar(current, ret);
+        inc_syscw(current);
+        return ret;
+}
+COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd,
+                const struct compat_iovec __user *, vec,
+                unsigned long, vlen)
+{
+        struct fd f = fdget(fd);
+        ssize_t ret;
+        loff_t pos;
+        if (!f.file)
+                return -EBADF;
+        pos = f.file->f_pos;
+        ret = compat_writev(f.file, vec, vlen, &pos);
+        f.file->f_pos = pos;
+        fdput(f);
+        return ret;
+}
+COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
+                const struct compat_iovec __user *,vec,
+                unsigned long, vlen, loff_t, pos)
+{
+        struct fd f;
+        ssize_t ret;
+        if (pos < 0)
+                return -EINVAL;
+        f = fdget(fd);
+        if (!f.file)
+                return -EBADF;
+        ret = -ESPIPE;
+        if (f.file->f_mode & FMODE_PWRITE)
+                ret = compat_writev(f.file, vec, vlen, &pos);
+        fdput(f);
+        return ret;
+}
+COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd,
+                const struct compat_iovec __user *,vec,
+                unsigned long, vlen, u32, pos_low, u32, pos_high)
+{
+        loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+        return compat_sys_pwritev64(fd, vec, vlen, pos);
+}
+#endif
 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                           size_t count, loff_t max)
 {
diff --git a/fs/read_write.h b/fs/read_write.h
index d07b954c6e0c..0ec530d9305b 100644
--- a/fs/read_write.h
+++ b/fs/read_write.h
@@ -7,8 +7,3 @@
 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
 typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *,
                unsigned long, loff_t);
-ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
-                unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn);
-ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
-                unsigned long nr_segs, loff_t *ppos, io_fn_t fn);
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 6165bd4784f6..dcaafcfc23b0 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -234,68 +234,9 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
        return ret;
 }
-/* Write @count bytes at position @ppos in a file indicated by @file
-   from the buffer @buf.
-   generic_file_write() is only appropriate for filesystems that are not seeking to optimize performance and want
-   something simple that works.  It is not for serious use by general purpose filesystems, excepting the one that it was
-   written for (ext2/3).  This is for several reasons:
-   * It has no understanding of any filesystem specific optimizations.
-   * It enters the filesystem repeatedly for each page that is written.
-   * It depends on reiserfs_get_block() function which if implemented by reiserfs performs costly search_by_key
-   * operation for each page it is supplied with. By contrast reiserfs_file_write() feeds as much as possible at a time
-   * to reiserfs which allows for fewer tree traversals.
-   * Each indirect pointer insertion takes a lot of cpu, because it involves memory moves inside of blocks.
-   * Asking the block allocation code for blocks one at a time is slightly less efficient.
-   All of these reasons for not using only generic file write were understood back when reiserfs was first miscoded to
-   use it, but we were in a hurry to make code freeze, and so it couldn't be revised then.  This new code should make
-   things right finally.
-   Future Features: providing search_by_key with hints.
-*/
-static ssize_t reiserfs_file_write(struct file *file,   /* the file we are going to write into */
-                                   const char __user * buf,     /*  pointer to user supplied data
-                                                                   (in userspace) */
-                                   size_t count,        /* amount of bytes to write */
-                                   loff_t * ppos        /* pointer to position in file that we start writing at. Should be updated to
-                                                         * new current position before returning. */
-                                   )
-{
-        struct inode *inode = file_inode(file); // Inode of the file that we are writing to.
-        /* To simplify coding at this time, we store
-           locked pages in array for now */
-        struct reiserfs_transaction_handle th;
-        th.t_trans_id = 0;
-        /* If a filesystem is converted from 3.5 to 3.6, we'll have v3.5 items
-        * lying around (most of the disk, in fact). Despite the filesystem
-        * now being a v3.6 format, the old items still can't support large
-        * file sizes. Catch this case here, as the rest of the VFS layer is
-        * oblivious to the different limitations between old and new items.
-        * reiserfs_setattr catches this for truncates. This chunk is lifted
-        * from generic_write_checks. */
-        if (get_inode_item_key_version (inode) == KEY_FORMAT_3_5 &&
-            *ppos + count > MAX_NON_LFS) {
-                if (*ppos >= MAX_NON_LFS) {
-                        return -EFBIG;
-                }
-                if (count > MAX_NON_LFS - (unsigned long)*ppos)
-                        count = MAX_NON_LFS - (unsigned long)*ppos;
-        }
-        return do_sync_write(file, buf, count, ppos);
-}
 const struct file_operations reiserfs_file_operations = {
        .read = do_sync_read,
-        .write = reiserfs_file_write,
+        .write = do_sync_write,
        .unlocked_ioctl = reiserfs_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl = reiserfs_compat_ioctl,
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 9cc0740adffa..33532f79b4f7 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -394,20 +394,24 @@ static int set_sb(struct super_block *sb, void *data)
        return -ENOENT;
 }
+struct reiserfs_seq_private {
+        struct super_block *sb;
+        int (*show) (struct seq_file *, struct super_block *);
+};
 static void *r_start(struct seq_file *m, loff_t * pos)
 {
-        struct proc_dir_entry *de = m->private;
+        struct reiserfs_seq_private *priv = m->private;
-        struct super_block *s = de->parent->data;
        loff_t l = *pos;
        if (l)
                return NULL;
-        if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, s)))
+        if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb)))
                return NULL;
-        up_write(&s->s_umount);
+        up_write(&priv->sb->s_umount);
-        return s;
+        return priv->sb;
 }
 static void *r_next(struct seq_file *m, void *v, loff_t * pos)
@@ -426,9 +430,8 @@ static void r_stop(struct seq_file *m, void *v)
 static int r_show(struct seq_file *m, void *v)
 {
-        struct proc_dir_entry *de = m->private;
+        struct reiserfs_seq_private *priv = m->private;
-        int (*show) (struct seq_file *, struct super_block *) = de->data;
+        return priv->show(m, v);
-        return show(m, v);
 }
 static const struct seq_operations r_ops = {
@@ -440,11 +443,15 @@ static const struct seq_operations r_ops = {
 static int r_open(struct inode *inode, struct file *file)
 {
-        int ret = seq_open(file, &r_ops);
+        struct reiserfs_seq_private *priv;
+        int ret = seq_open_private(file, &r_ops,
+                                   sizeof(struct reiserfs_seq_private));
        if (!ret) {
                struct seq_file *m = file->private_data;
-                m->private = PDE(inode);
+                priv = m->private;
+                priv->sb = proc_get_parent_data(inode);
+                priv->show = PDE_DATA(inode);
        }
        return ret;
 }
@@ -453,7 +460,7 @@ static const struct file_operations r_file_operations = {
        .open = r_open,
        .read = seq_read,
        .llseek = seq_lseek,
-        .release = seq_release,
+        .release = seq_release_private,
        .owner = THIS_MODULE,
 };
@@ -479,9 +486,8 @@ int reiserfs_proc_info_init(struct super_block *sb)
                *s = '!';
        spin_lock_init(&__PINFO(sb).lock);
-        REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root);
+        REISERFS_SB(sb)->procdir = proc_mkdir_data(b, 0, proc_info_root, sb);
        if (REISERFS_SB(sb)->procdir) {
-                REISERFS_SB(sb)->procdir->data = sb;
                add_file(sb, "version", show_version);
                add_file(sb, "super", show_super);
                add_file(sb, "per-level", show_per_level);
@@ -499,29 +505,17 @@ int reiserfs_proc_info_init(struct super_block *sb)
 int reiserfs_proc_info_done(struct super_block *sb)
 {
        struct proc_dir_entry *de = REISERFS_SB(sb)->procdir;
-        char b[BDEVNAME_SIZE];
+        if (de) {
-        char *s;
+                char b[BDEVNAME_SIZE];
+                char *s;
-        /* Some block devices use /'s */
+                /* Some block devices use /'s */
-        strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
+                strlcpy(b, reiserfs_bdevname(sb), BDEVNAME_SIZE);
-        s = strchr(b, '/');
+                s = strchr(b, '/');
-        if (s)
+                if (s)
-                *s = '!';
+                        *s = '!';
-        if (de) {
+                remove_proc_subtree(b, proc_info_root);
-                remove_proc_entry("journal", de);
-                remove_proc_entry("oidmap", de);
-                remove_proc_entry("on-disk-super", de);
-                remove_proc_entry("bitmap", de);
-                remove_proc_entry("per-level", de);
-                remove_proc_entry("super", de);
-                remove_proc_entry("version", de);
-        }
-        spin_lock(&__PINFO(sb).lock);
-        __PINFO(sb).exiting = 1;
-        spin_unlock(&__PINFO(sb).lock);
-        if (proc_info_root) {
-                remove_proc_entry(b, proc_info_root);
                REISERFS_SB(sb)->procdir = NULL;
        }
        return 0;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 38bb59f3f2ad..774c1eb7f1c9 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -599,6 +599,24 @@ int single_open(struct file *file, int (*show)(struct seq_file *, void *),
 }
 EXPORT_SYMBOL(single_open);
+int single_open_size(struct file *file, int (*show)(struct seq_file *, void *),
+                void *data, size_t size)
+{
+        char *buf = kmalloc(size, GFP_KERNEL);
+        int ret;
+        if (!buf)
+                return -ENOMEM;
+        ret = single_open(file, show, data);
+        if (ret) {
+                kfree(buf);
+                return ret;
+        }
+        ((struct seq_file *)file->private_data)->buf = buf;
+        ((struct seq_file *)file->private_data)->size = size;
+        return 0;
+}
+EXPORT_SYMBOL(single_open_size);
 int single_release(struct inode *inode, struct file *file)
 {
        const struct seq_operations *op = ((struct seq_file *)file->private_data)->op;
diff --git a/fs/splice.c b/fs/splice.c
index 6b485b8753bd..e6b25598c8c4 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -219,7 +219,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
                        page_nr++;
                        ret += buf->len;
-                        if (pipe->inode)
+                        if (pipe->files)
                                do_wakeup = 1;
                        if (!--spd->nr_pages)
@@ -829,7 +829,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
                        ops->release(pipe, buf);
                        pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
                        pipe->nrbufs--;
-                        if (pipe->inode)
+                        if (pipe->files)
                                sd->need_wakeup = true;
                }
@@ -1001,8 +1001,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
        };
        ssize_t ret;
-        sb_start_write(inode->i_sb);
        pipe_lock(pipe);
        splice_from_pipe_begin(&sd);
@@ -1038,7 +1036,6 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
                        *ppos += ret;
                balance_dirty_pages_ratelimited(mapping);
        }
-        sb_end_write(inode->i_sb);
        return ret;
 }
@@ -1118,7 +1115,10 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
        else
                splice_write = default_file_splice_write;
-        return splice_write(pipe, out, ppos, len, flags);
+        file_start_write(out);
+        ret = splice_write(pipe, out, ppos, len, flags);
+        file_end_write(out);
+        return ret;
 }
 /*
@@ -1184,7 +1184,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,
         */
        pipe = current->splice_pipe;
        if (unlikely(!pipe)) {
-                pipe = alloc_pipe_info(NULL);
+                pipe = alloc_pipe_info();
                if (!pipe)
                        return -ENOMEM;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f03bf1a456fb..3800128d2171 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -775,8 +775,6 @@ xfs_file_aio_write(
        if (ocount == 0)
                return 0;
-        sb_start_write(inode->i_sb);
        if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
                ret = -EIO;
                goto out;
@@ -800,7 +798,6 @@ xfs_file_aio_write(
        }
 out:
-        sb_end_write(inode->i_sb);
        return ret;
 }
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-05-01 20:51:54 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-05-01 20:51:54 -0400
commit	20b4fb485227404329e41ad15588afad3df23050 (patch)
tree	f3e099f0ab3da8a93b447203e294d2bb22f6dc05 /fs
parent	b9394d8a657cd3c064fa432aa0905c1b58b38fe9 (diff)
parent	ac3e3c5b1164397656df81b9e9ab4991184d3236 (diff)