44 files changed, 887 insertions, 719 deletions
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 7578c1ab9e0b..8630615e57fe 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -146,7 +146,6 @@ int afs_proc_init(void)
        proc_afs = proc_mkdir("fs/afs", NULL);
        if (!proc_afs)
                goto error_dir;
-        proc_afs->owner = THIS_MODULE;
        p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
        if (!p)
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index a76803108d06..b7ff33c63101 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -186,6 +186,8 @@ int autofs4_expire_wait(struct dentry *dentry);
 int autofs4_expire_run(struct super_block *, struct vfsmount *,
                        struct autofs_sb_info *,
                        struct autofs_packet_expire __user *);
+int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
+                            struct autofs_sb_info *sbi, int when);
 int autofs4_expire_multi(struct super_block *, struct vfsmount *,
                        struct autofs_sb_info *, int __user *);
 struct dentry *autofs4_expire_direct(struct super_block *sb,
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 025e105bffea..9e5ae8a4f5c8 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -525,40 +525,13 @@ static int autofs_dev_ioctl_expire(struct file *fp,
                                   struct autofs_sb_info *sbi,
                                   struct autofs_dev_ioctl *param)
 {
-        struct dentry *dentry;
        struct vfsmount *mnt;
-        int err = -EAGAIN;
        int how;
        how = param->expire.how;
        mnt = fp->f_path.mnt;
-        if (autofs_type_trigger(sbi->type))
+        return autofs4_do_expire_multi(sbi->sb, mnt, sbi, how);
-                dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how);
-        else
-                dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how);
-        if (dentry) {
-                struct autofs_info *ino = autofs4_dentry_ino(dentry);
-                /*
-                 * This is synchronous because it makes the daemon a
-                 * little easier
-                */
-                err = autofs4_wait(sbi, dentry, NFY_EXPIRE);
-                spin_lock(&sbi->fs_lock);
-                if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
-                        ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
-                        sbi->sb->s_root->d_mounted++;
-                }
-                ino->flags &= ~AUTOFS_INF_EXPIRING;
-                complete_all(&ino->expire_complete);
-                spin_unlock(&sbi->fs_lock);
-                dput(dentry);
-        }
-        return err;
 }
 /* Check if autofs mount point is in use */
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index e3bd50776f9e..75f7ddacf7d6 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -478,22 +478,16 @@ int autofs4_expire_run(struct super_block *sb,
        return ret;
 }
-/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
+int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
-   more to be done */
+                            struct autofs_sb_info *sbi, int when)
-int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
-                        struct autofs_sb_info *sbi, int __user *arg)
 {
        struct dentry *dentry;
        int ret = -EAGAIN;
-        int do_now = 0;
-        if (arg && get_user(do_now, arg))
-                return -EFAULT;
        if (autofs_type_trigger(sbi->type))
-                dentry = autofs4_expire_direct(sb, mnt, sbi, do_now);
+                dentry = autofs4_expire_direct(sb, mnt, sbi, when);
        else
-                dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now);
+                dentry = autofs4_expire_indirect(sb, mnt, sbi, when);
        if (dentry) {
                struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -516,3 +510,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
        return ret;
 }
+/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
+   more to be done */
+int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
+                        struct autofs_sb_info *sbi, int __user *arg)
+{
+        int do_now = 0;
+        if (arg && get_user(do_now, arg))
+                return -EFAULT;
+        return autofs4_do_expire_multi(sb, mnt, sbi, do_now);
+}
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 74b1469a9504..e383bf0334f1 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -485,22 +485,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
        DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
                 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
-        expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
-        if (expiring) {
-                /*
-                 * If we are racing with expire the request might not
-                 * be quite complete but the directory has been removed
-                 * so it must have been successful, so just wait for it.
-                 */
-                ino = autofs4_dentry_ino(expiring);
-                autofs4_expire_wait(expiring);
-                spin_lock(&sbi->lookup_lock);
-                if (!list_empty(&ino->expiring))
-                        list_del_init(&ino->expiring);
-                spin_unlock(&sbi->lookup_lock);
-                dput(expiring);
-        }
        unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name);
        if (unhashed)
                dentry = unhashed;
@@ -538,14 +522,31 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
        }
        if (!oz_mode) {
+                mutex_unlock(&dir->i_mutex);
+                expiring = autofs4_lookup_expiring(sbi,
+                                                   dentry->d_parent,
+                                                   &dentry->d_name);
+                if (expiring) {
+                        /*
+                         * If we are racing with expire the request might not
+                         * be quite complete but the directory has been removed
+                         * so it must have been successful, so just wait for it.
+                         */
+                        ino = autofs4_dentry_ino(expiring);
+                        autofs4_expire_wait(expiring);
+                        spin_lock(&sbi->lookup_lock);
+                        if (!list_empty(&ino->expiring))
+                                list_del_init(&ino->expiring);
+                        spin_unlock(&sbi->lookup_lock);
+                        dput(expiring);
+                }
                spin_lock(&dentry->d_lock);
                dentry->d_flags |= DCACHE_AUTOFS_PENDING;
                spin_unlock(&dentry->d_lock);
-                if (dentry->d_op && dentry->d_op->d_revalidate) {
+                if (dentry->d_op && dentry->d_op->d_revalidate)
-                        mutex_unlock(&dir->i_mutex);
                        (dentry->d_op->d_revalidate)(dentry, nd);
-                        mutex_lock(&dir->i_mutex);
+                mutex_lock(&dir->i_mutex);
-                }
        }
        /*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5e1d4e30e9d8..7dd1b6d0bf32 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2060,7 +2060,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 unsigned long btrfs_force_ra(struct address_space *mapping,
                              struct file_ra_state *ra, struct file *file,
                              pgoff_t offset, pgoff_t last_index);
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page);
+int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_delete_inode(struct inode *inode);
 void btrfs_put_inode(struct inode *inode);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7d4f948bc22a..17e608c4dc70 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4292,8 +4292,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
 * beyond EOF, then the page is guaranteed safe against truncation until we
 * unlock the page.
 */
-int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        struct inode *inode = fdentry(vma->vm_file)->d_inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
@@ -4306,10 +4307,15 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
        u64 page_end;
        ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
-        if (ret)
+        if (ret) {
+                if (ret == -ENOMEM)
+                        ret = VM_FAULT_OOM;
+                else /* -ENOSPC, -EIO, etc */
+                        ret = VM_FAULT_SIGBUS;
                goto out;
+        }
-        ret = -EINVAL;
+        ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
 again:
        lock_page(page);
        size = i_size_read(inode);
diff --git a/fs/buffer.c b/fs/buffer.c
index a2fd743d97cb..f5f8b15a6e40 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -290,7 +290,7 @@ static void free_more_memory(void)
                                                &zone);
                if (zone)
                        try_to_free_pages(node_zonelist(nid, GFP_NOFS), 0,
-                                                GFP_NOFS);
+                                                GFP_NOFS, NULL);
        }
 }
@@ -547,6 +547,39 @@ repeat:
        return err;
 }
+void do_thaw_all(unsigned long unused)
+{
+        struct super_block *sb;
+        char b[BDEVNAME_SIZE];
+        spin_lock(&sb_lock);
+restart:
+        list_for_each_entry(sb, &super_blocks, s_list) {
+                sb->s_count++;
+                spin_unlock(&sb_lock);
+                down_read(&sb->s_umount);
+                while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb))
+                        printk(KERN_WARNING "Emergency Thaw on %s\n",
+                               bdevname(sb->s_bdev, b));
+                up_read(&sb->s_umount);
+                spin_lock(&sb_lock);
+                if (__put_super_and_need_restart(sb))
+                        goto restart;
+        }
+        spin_unlock(&sb_lock);
+        printk(KERN_WARNING "Emergency Thaw complete\n");
+}
+/**
+ * emergency_thaw_all -- forcibly thaw every frozen filesystem
+ *
+ * Used for emergency unfreeze of all filesystems via SysRq
+ */
+void emergency_thaw_all(void)
+{
+        pdflush_operation(do_thaw_all, 0);
+}
 /**
 * sync_mapping_buffers - write out & wait upon a mapping's "associated" buffers
 * @mapping: the mapping which wants those buffers written
@@ -621,14 +654,7 @@ static void __set_page_dirty(struct page *page,
        spin_lock_irq(&mapping->tree_lock);
        if (page->mapping) {    /* Race with truncate? */
                WARN_ON_ONCE(warn && !PageUptodate(page));
+                account_page_dirtied(page, mapping);
-                if (mapping_cap_account_dirty(mapping)) {
-                        __inc_zone_page_state(page, NR_FILE_DIRTY);
-                        __inc_bdi_stat(mapping->backing_dev_info,
-                                        BDI_RECLAIMABLE);
-                        task_dirty_inc(current);
-                        task_io_account_write(PAGE_CACHE_SIZE);
-                }
                radix_tree_tag_set(&mapping->page_tree,
                                page_index(page), PAGECACHE_TAG_DIRTY);
        }
@@ -2320,13 +2346,14 @@ int block_commit_write(struct page *page, unsigned from, unsigned to)
 * unlock the page.
 */
 int
-block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
                   get_block_t get_block)
 {
+        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
        unsigned long end;
        loff_t size;
-        int ret = -EINVAL;
+        int ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
        lock_page(page);
        size = i_size_read(inode);
@@ -2346,6 +2373,13 @@ block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
        if (!ret)
                ret = block_commit_write(page, 0, end);
+        if (unlikely(ret)) {
+                if (ret == -ENOMEM)
+                        ret = VM_FAULT_OOM;
+                else /* -ENOSPC, -EIO, etc */
+                        ret = VM_FAULT_SIGBUS;
+        }
 out_unlock:
        unlock_page(page);
        return ret;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 877e4d9a1159..7f19fefd3d45 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -404,7 +404,6 @@ cifs_proc_init(void)
        if (proc_fs_cifs == NULL)
                return;
-        proc_fs_cifs->owner = THIS_MODULE;
        proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops);
 #ifdef CONFIG_CIFS_STATS
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e4a6223c3145..af737bb56cb7 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -740,8 +740,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
 out_release_free_unlock:
        crypto_free_hash(s->hash_desc.tfm);
 out_free_unlock:
-        memset(s->block_aligned_filename, 0, s->block_aligned_filename_size);
+        kzfree(s->block_aligned_filename);
-        kfree(s->block_aligned_filename);
 out_unlock:
        mutex_unlock(s->tfm_mutex);
 out:
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index 96ef51489e01..295e7fa56755 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -291,8 +291,7 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon)
        if (daemon->user_ns)
                put_user_ns(daemon->user_ns);
        mutex_unlock(&daemon->mux);
-        memset(daemon, 0, sizeof(*daemon));
+        kzfree(daemon);
-        kfree(daemon);
 out:
        return rc;
 }
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 5de2c2db3aa2..2a701d593d35 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -28,6 +28,7 @@ struct eventfd_ctx {
         * issue a wakeup.
         */
        __u64 count;
+        unsigned int flags;
 };
 /*
@@ -50,7 +51,7 @@ int eventfd_signal(struct file *file, int n)
                n = (int) (ULLONG_MAX - ctx->count);
        ctx->count += n;
        if (waitqueue_active(&ctx->wqh))
-                wake_up_locked(&ctx->wqh);
+                wake_up_locked_poll(&ctx->wqh, POLLIN);
        spin_unlock_irqrestore(&ctx->wqh.lock, flags);
        return n;
@@ -87,22 +88,20 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
 {
        struct eventfd_ctx *ctx = file->private_data;
        ssize_t res;
-        __u64 ucnt;
+        __u64 ucnt = 0;
        DECLARE_WAITQUEUE(wait, current);
        if (count < sizeof(ucnt))
                return -EINVAL;
        spin_lock_irq(&ctx->wqh.lock);
        res = -EAGAIN;
-        ucnt = ctx->count;
+        if (ctx->count > 0)
-        if (ucnt > 0)
                res = sizeof(ucnt);
        else if (!(file->f_flags & O_NONBLOCK)) {
                __add_wait_queue(&ctx->wqh, &wait);
                for (res = 0;;) {
                        set_current_state(TASK_INTERRUPTIBLE);
                        if (ctx->count > 0) {
-                                ucnt = ctx->count;
                                res = sizeof(ucnt);
                                break;
                        }
@@ -117,10 +116,11 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
                __remove_wait_queue(&ctx->wqh, &wait);
                __set_current_state(TASK_RUNNING);
        }
-        if (res > 0) {
+        if (likely(res > 0)) {
-                ctx->count = 0;
+                ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+                ctx->count -= ucnt;
                if (waitqueue_active(&ctx->wqh))
-                        wake_up_locked(&ctx->wqh);
+                        wake_up_locked_poll(&ctx->wqh, POLLOUT);
        }
        spin_unlock_irq(&ctx->wqh.lock);
        if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
@@ -166,10 +166,10 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
                __remove_wait_queue(&ctx->wqh, &wait);
                __set_current_state(TASK_RUNNING);
        }
-        if (res > 0) {
+        if (likely(res > 0)) {
                ctx->count += ucnt;
                if (waitqueue_active(&ctx->wqh))
-                        wake_up_locked(&ctx->wqh);
+                        wake_up_locked_poll(&ctx->wqh, POLLIN);
        }
        spin_unlock_irq(&ctx->wqh.lock);
@@ -207,7 +207,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
        BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
        BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
-        if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
+        if (flags & ~EFD_FLAGS_SET)
                return -EINVAL;
        ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
@@ -216,13 +216,14 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
        init_waitqueue_head(&ctx->wqh);
        ctx->count = count;
+        ctx->flags = flags;
        /*
         * When we call this, the initialization must be complete, since
         * anon_inode_getfd() will install the fd.
         */
        fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
-                              flags & (O_CLOEXEC | O_NONBLOCK));
+                              flags & EFD_SHARED_FCNTL_FLAGS);
        if (fd < 0)
                kfree(ctx);
        return fd;
@@ -232,3 +233,4 @@ SYSCALL_DEFINE1(eventfd, unsigned int, count)
 {
        return sys_eventfd2(count, 0);
 }
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index c5c424f23fd5..a89f370fadb5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1,6 +1,6 @@
 /*
- *  fs/eventpoll.c (Efficent event polling implementation)
+ *  fs/eventpoll.c (Efficient event retrieval implementation)
- *  Copyright (C) 2001,...,2007  Davide Libenzi
+ *  Copyright (C) 2001,...,2009  Davide Libenzi
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
@@ -71,29 +71,11 @@
 * a better scalability.
 */
-#define DEBUG_EPOLL 0
-#if DEBUG_EPOLL > 0
-#define DPRINTK(x) printk x
-#define DNPRINTK(n, x) do { if ((n) <= DEBUG_EPOLL) printk x; } while (0)
-#else /* #if DEBUG_EPOLL > 0 */
-#define DPRINTK(x) (void) 0
-#define DNPRINTK(n, x) (void) 0
-#endif /* #if DEBUG_EPOLL > 0 */
-#define DEBUG_EPI 0
-#if DEBUG_EPI != 0
-#define EPI_SLAB_DEBUG (SLAB_DEBUG_FREE | SLAB_RED_ZONE /* | SLAB_POISON */)
-#else /* #if DEBUG_EPI != 0 */
-#define EPI_SLAB_DEBUG 0
-#endif /* #if DEBUG_EPI != 0 */
 /* Epoll private bits inside the event mask */
 #define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
-/* Maximum number of poll wake up nests we are allowing */
+/* Maximum number of nesting allowed inside epoll sets */
-#define EP_MAX_POLLWAKE_NESTS 4
+#define EP_MAX_NESTS 4
 /* Maximum msec timeout value storeable in a long int */
 #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
@@ -110,24 +92,21 @@ struct epoll_filefd {
 };
 /*
- * Node that is linked into the "wake_task_list" member of the "struct poll_safewake".
+ * Structure used to track possible nested calls, for too deep recursions
- * It is used to keep track on all tasks that are currently inside the wake_up() code
+ * and loop cycles.
- * to 1) short-circuit the one coming from the same task and same wait queue head
- * (loop) 2) allow a maximum number of epoll descriptors inclusion nesting
- * 3) let go the ones coming from other tasks.
 */
-struct wake_task_node {
+struct nested_call_node {
        struct list_head llink;
-        struct task_struct *task;
+        void *cookie;
-        wait_queue_head_t *wq;
+        int cpu;
 };
 /*
- * This is used to implement the safe poll wake up avoiding to reenter
+ * This structure is used as collector for nested calls, to check for
- * the poll callback from inside wake_up().
+ * maximum recursion dept and loop cycles.
 */
-struct poll_safewake {
+struct nested_calls {
-        struct list_head wake_task_list;
+        struct list_head tasks_call_list;
        spinlock_t lock;
 };
@@ -213,7 +192,7 @@ struct eppoll_entry {
        struct list_head llink;
        /* The "base" pointer is set to the container "struct epitem" */
-        void *base;
+        struct epitem *base;
        /*
         * Wait queue item that will be linked to the target file wait
@@ -231,6 +210,12 @@ struct ep_pqueue {
        struct epitem *epi;
 };
+/* Used by the ep_send_events() function as callback private data */
+struct ep_send_events_data {
+        int maxevents;
+        struct epoll_event __user *events;
+};
 /*
 * Configuration options available inside /proc/sys/fs/epoll/
 */
@@ -242,8 +227,11 @@ static int max_user_watches __read_mostly;
 */
 static DEFINE_MUTEX(epmutex);
-/* Safe wake up implementation */
+/* Used for safe wake up implementation */
-static struct poll_safewake psw;
+static struct nested_calls poll_safewake_ncalls;
+/* Used to call file's f_op->poll() under the nested calls boundaries */
+static struct nested_calls poll_readywalk_ncalls;
 /* Slab cache used to allocate "struct epitem" */
 static struct kmem_cache *epi_cache __read_mostly;
@@ -312,89 +300,230 @@ static inline int ep_op_has_event(int op)
 }
 /* Initialize the poll safe wake up structure */
-static void ep_poll_safewake_init(struct poll_safewake *psw)
+static void ep_nested_calls_init(struct nested_calls *ncalls)
 {
+        INIT_LIST_HEAD(&ncalls->tasks_call_list);
-        INIT_LIST_HEAD(&psw->wake_task_list);
+        spin_lock_init(&ncalls->lock);
-        spin_lock_init(&psw->lock);
 }
-/*
+/**
- * Perform a safe wake up of the poll wait list. The problem is that
+ * ep_call_nested - Perform a bound (possibly) nested call, by checking
- * with the new callback'd wake up system, it is possible that the
+ *                  that the recursion limit is not exceeded, and that
- * poll callback is reentered from inside the call to wake_up() done
+ *                  the same nested call (by the meaning of same cookie) is
- * on the poll wait queue head. The rule is that we cannot reenter the
+ *                  no re-entered.
- * wake up code from the same task more than EP_MAX_POLLWAKE_NESTS times,
+ *
- * and we cannot reenter the same wait queue head at all. This will
+ * @ncalls: Pointer to the nested_calls structure to be used for this call.
- * enable to have a hierarchy of epoll file descriptor of no more than
+ * @max_nests: Maximum number of allowed nesting calls.
- * EP_MAX_POLLWAKE_NESTS deep. We need the irq version of the spin lock
+ * @nproc: Nested call core function pointer.
- * because this one gets called by the poll callback, that in turn is called
+ * @priv: Opaque data to be passed to the @nproc callback.
- * from inside a wake_up(), that might be called from irq context.
+ * @cookie: Cookie to be used to identify this nested call.
+ *
+ * Returns: Returns the code returned by the @nproc callback, or -1 if
+ *          the maximum recursion limit has been exceeded.
 */
-static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq)
+static int ep_call_nested(struct nested_calls *ncalls, int max_nests,
+                          int (*nproc)(void *, void *, int), void *priv,
+                          void *cookie)
 {
-        int wake_nests = 0;
+        int error, call_nests = 0;
        unsigned long flags;
-        struct task_struct *this_task = current;
+        int this_cpu = get_cpu();
-        struct list_head *lsthead = &psw->wake_task_list;
+        struct list_head *lsthead = &ncalls->tasks_call_list;
-        struct wake_task_node *tncur;
+        struct nested_call_node *tncur;
-        struct wake_task_node tnode;
+        struct nested_call_node tnode;
-        spin_lock_irqsave(&psw->lock, flags);
+        spin_lock_irqsave(&ncalls->lock, flags);
-        /* Try to see if the current task is already inside this wakeup call */
+        /*
+         * Try to see if the current task is already inside this wakeup call.
+         * We use a list here, since the population inside this set is always
+         * very much limited.
+         */
        list_for_each_entry(tncur, lsthead, llink) {
+                if (tncur->cpu == this_cpu &&
-                if (tncur->wq == wq ||
+                    (tncur->cookie == cookie || ++call_nests > max_nests)) {
-                    (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) {
                        /*
                         * Ops ... loop detected or maximum nest level reached.
                         * We abort this wake by breaking the cycle itself.
                         */
-                        spin_unlock_irqrestore(&psw->lock, flags);
+                        error = -1;
-                        return;
+                        goto out_unlock;
                }
        }
-        /* Add the current task to the list */
+        /* Add the current task and cookie to the list */
-        tnode.task = this_task;
+        tnode.cpu = this_cpu;
-        tnode.wq = wq;
+        tnode.cookie = cookie;
        list_add(&tnode.llink, lsthead);
-        spin_unlock_irqrestore(&psw->lock, flags);
+        spin_unlock_irqrestore(&ncalls->lock, flags);
-        /* Do really wake up now */
+        /* Call the nested function */
-        wake_up_nested(wq, 1 + wake_nests);
+        error = (*nproc)(priv, cookie, call_nests);
        /* Remove the current task from the list */
-        spin_lock_irqsave(&psw->lock, flags);
+        spin_lock_irqsave(&ncalls->lock, flags);
        list_del(&tnode.llink);
-        spin_unlock_irqrestore(&psw->lock, flags);
+ out_unlock:
+        spin_unlock_irqrestore(&ncalls->lock, flags);
+        put_cpu();
+        return error;
+}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
+                                     unsigned long events, int subclass)
+{
+        unsigned long flags;
+        spin_lock_irqsave_nested(&wqueue->lock, flags, subclass);
+        wake_up_locked_poll(wqueue, events);
+        spin_unlock_irqrestore(&wqueue->lock, flags);
+}
+#else
+static inline void ep_wake_up_nested(wait_queue_head_t *wqueue,
+                                     unsigned long events, int subclass)
+{
+        wake_up_poll(wqueue, events);
+}
+#endif
+static int ep_poll_wakeup_proc(void *priv, void *cookie, int call_nests)
+{
+        ep_wake_up_nested((wait_queue_head_t *) cookie, POLLIN,
+                          1 + call_nests);
+        return 0;
+}
+/*
+ * Perform a safe wake up of the poll wait list. The problem is that
+ * with the new callback'd wake up system, it is possible that the
+ * poll callback is reentered from inside the call to wake_up() done
+ * on the poll wait queue head. The rule is that we cannot reenter the
+ * wake up code from the same task more than EP_MAX_NESTS times,
+ * and we cannot reenter the same wait queue head at all. This will
+ * enable to have a hierarchy of epoll file descriptor of no more than
+ * EP_MAX_NESTS deep.
+ */
+static void ep_poll_safewake(wait_queue_head_t *wq)
+{
+        ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS,
+                       ep_poll_wakeup_proc, NULL, wq);
 }
 /*
- * This function unregister poll callbacks from the associated file descriptor.
+ * This function unregisters poll callbacks from the associated file
- * Since this must be called without holding "ep->lock" the atomic exchange trick
+ * descriptor.  Must be called with "mtx" held (or "epmutex" if called from
- * will protect us from multiple unregister.
+ * ep_free).
 */
 static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
 {
-        int nwait;
        struct list_head *lsthead = &epi->pwqlist;
        struct eppoll_entry *pwq;
-        /* This is called without locks, so we need the atomic exchange */
+        while (!list_empty(lsthead)) {
-        nwait = xchg(&epi->nwait, 0);
+                pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
-        if (nwait) {
+                list_del(&pwq->llink);
-                while (!list_empty(lsthead)) {
+                remove_wait_queue(pwq->whead, &pwq->wait);
-                        pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
+                kmem_cache_free(pwq_cache, pwq);
+        }
+}
-                        list_del_init(&pwq->llink);
+/**
-                        remove_wait_queue(pwq->whead, &pwq->wait);
+ * ep_scan_ready_list - Scans the ready list in a way that makes possible for
-                        kmem_cache_free(pwq_cache, pwq);
+ *                      the scan code, to call f_op->poll(). Also allows for
-                }
+ *                      O(NumReady) performance.
+ *
+ * @ep: Pointer to the epoll private data structure.
+ * @sproc: Pointer to the scan callback.
+ * @priv: Private opaque data passed to the @sproc callback.
+ *
+ * Returns: The same integer error code returned by the @sproc callback.
+ */
+static int ep_scan_ready_list(struct eventpoll *ep,
+                              int (*sproc)(struct eventpoll *,
+                                           struct list_head *, void *),
+                              void *priv)
+{
+        int error, pwake = 0;
+        unsigned long flags;
+        struct epitem *epi, *nepi;
+        LIST_HEAD(txlist);
+        /*
+         * We need to lock this because we could be hit by
+         * eventpoll_release_file() and epoll_ctl().
+         */
+        mutex_lock(&ep->mtx);
+        /*
+         * Steal the ready list, and re-init the original one to the
+         * empty list. Also, set ep->ovflist to NULL so that events
+         * happening while looping w/out locks, are not lost. We cannot
+         * have the poll callback to queue directly on ep->rdllist,
+         * because we want the "sproc" callback to be able to do it
+         * in a lockless way.
+         */
+        spin_lock_irqsave(&ep->lock, flags);
+        list_splice_init(&ep->rdllist, &txlist);
+        ep->ovflist = NULL;
+        spin_unlock_irqrestore(&ep->lock, flags);
+        /*
+         * Now call the callback function.
+         */
+        error = (*sproc)(ep, &txlist, priv);
+        spin_lock_irqsave(&ep->lock, flags);
+        /*
+         * During the time we spent inside the "sproc" callback, some
+         * other events might have been queued by the poll callback.
+         * We re-insert them inside the main ready-list here.
+         */
+        for (nepi = ep->ovflist; (epi = nepi) != NULL;
+             nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
+                /*
+                 * We need to check if the item is already in the list.
+                 * During the "sproc" callback execution time, items are
+                 * queued into ->ovflist but the "txlist" might already
+                 * contain them, and the list_splice() below takes care of them.
+                 */
+                if (!ep_is_linked(&epi->rdllink))
+                        list_add_tail(&epi->rdllink, &ep->rdllist);
+        }
+        /*
+         * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
+         * releasing the lock, events will be queued in the normal way inside
+         * ep->rdllist.
+         */
+        ep->ovflist = EP_UNACTIVE_PTR;
+        /*
+         * Quickly re-inject items left on "txlist".
+         */
+        list_splice(&txlist, &ep->rdllist);
+        if (!list_empty(&ep->rdllist)) {
+                /*
+                 * Wake up (if active) both the eventpoll wait list and
+                 * the ->poll() wait list (delayed after we release the lock).
+                 */
+                if (waitqueue_active(&ep->wq))
+                        wake_up_locked(&ep->wq);
+                if (waitqueue_active(&ep->poll_wait))
+                        pwake++;
        }
+        spin_unlock_irqrestore(&ep->lock, flags);
+        mutex_unlock(&ep->mtx);
+        /* We have to call this outside the lock */
+        if (pwake)
+                ep_poll_safewake(&ep->poll_wait);
+        return error;
 }
 /*
@@ -434,9 +563,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
        atomic_dec(&ep->user->epoll_watches);
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_remove(%p, %p)\n",
-                     current, ep, file));
        return 0;
 }
@@ -447,7 +573,7 @@ static void ep_free(struct eventpoll *ep)
        /* We need to release all tasks waiting for these file */
        if (waitqueue_active(&ep->poll_wait))
-                ep_poll_safewake(&psw, &ep->poll_wait);
+                ep_poll_safewake(&ep->poll_wait);
        /*
         * We need to lock this because we could be hit by
@@ -492,26 +618,54 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file)
        if (ep)
                ep_free(ep);
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: close() ep=%p\n", current, ep));
        return 0;
 }
+static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
+                               void *priv)
+{
+        struct epitem *epi, *tmp;
+        list_for_each_entry_safe(epi, tmp, head, rdllink) {
+                if (epi->ffd.file->f_op->poll(epi->ffd.file, NULL) &
+                    epi->event.events)
+                        return POLLIN | POLLRDNORM;
+                else {
+                        /*
+                         * Item has been dropped into the ready list by the poll
+                         * callback, but it's not actually ready, as far as
+                         * caller requested events goes. We can remove it here.
+                         */
+                        list_del_init(&epi->rdllink);
+                }
+        }
+        return 0;
+}
+static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
+{
+        return ep_scan_ready_list(priv, ep_read_events_proc, NULL);
+}
 static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
 {
-        unsigned int pollflags = 0;
+        int pollflags;
-        unsigned long flags;
        struct eventpoll *ep = file->private_data;
        /* Insert inside our poll wait queue */
        poll_wait(file, &ep->poll_wait, wait);
-        /* Check our condition */
+        /*
-        spin_lock_irqsave(&ep->lock, flags);
+         * Proceed to find out if wanted events are really available inside
-        if (!list_empty(&ep->rdllist))
+         * the ready list. This need to be done under ep_call_nested()
-                pollflags = POLLIN | POLLRDNORM;
+         * supervision, since the call to f_op->poll() done on listed files
-        spin_unlock_irqrestore(&ep->lock, flags);
+         * could re-enter here.
+         */
+        pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
+                                   ep_poll_readyevents_proc, ep, ep);
-        return pollflags;
+        return pollflags != -1 ? pollflags : 0;
 }
 /* File callbacks that implement the eventpoll file behaviour */
@@ -541,7 +695,7 @@ void eventpoll_release_file(struct file *file)
         * We don't want to get "file->f_lock" because it is not
         * necessary. It is not necessary because we're in the "struct file"
         * cleanup path, and this means that noone is using this file anymore.
-         * So, for example, epoll_ctl() cannot hit here sicne if we reach this
+         * So, for example, epoll_ctl() cannot hit here since if we reach this
         * point, the file counter already went to zero and fget() would fail.
         * The only hit might come from ep_free() but by holding the mutex
         * will correctly serialize the operation. We do need to acquire
@@ -588,8 +742,6 @@ static int ep_alloc(struct eventpoll **pep)
        *pep = ep;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
-                     current, ep));
        return 0;
 free_uid:
@@ -623,9 +775,6 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
                }
        }
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_find(%p) -> %p\n",
-                     current, file, epir));
        return epir;
 }
@@ -641,9 +790,6 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
        struct epitem *epi = ep_item_from_wait(wait);
        struct eventpoll *ep = epi->ep;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
-                     current, epi->ffd.file, epi, ep));
        spin_lock_irqsave(&ep->lock, flags);
        /*
@@ -656,6 +802,15 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
                goto out_unlock;
        /*
+         * Check the events coming with the callback. At this stage, not
+         * every device reports the events in the "key" parameter of the
+         * callback. We need to be able to handle both cases here, hence the
+         * test for "key" != NULL before the event match test.
+         */
+        if (key && !((unsigned long) key & epi->event.events))
+                goto out_unlock;
+        /*
         * If we are trasfering events to userspace, we can hold no locks
         * (because we're accessing user memory, and because of linux f_op->poll()
         * semantics). All the events that happens during that period of time are
@@ -670,12 +825,9 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
        }
        /* If this file is already in the ready list we exit soon */
-        if (ep_is_linked(&epi->rdllink))
+        if (!ep_is_linked(&epi->rdllink))
-                goto is_linked;
+                list_add_tail(&epi->rdllink, &ep->rdllist);
-        list_add_tail(&epi->rdllink, &ep->rdllist);
-is_linked:
        /*
         * Wake up ( if active ) both the eventpoll wait list and the ->poll()
         * wait list.
@@ -690,7 +842,7 @@ out_unlock:
        /* We have to call this outside the lock */
        if (pwake)
-                ep_poll_safewake(&psw, &ep->poll_wait);
+                ep_poll_safewake(&ep->poll_wait);
        return 1;
 }
@@ -817,10 +969,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
        /* We have to call this outside the lock */
        if (pwake)
-                ep_poll_safewake(&psw, &ep->poll_wait);
+                ep_poll_safewake(&ep->poll_wait);
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_insert(%p, %p, %d)\n",
-                     current, ep, tfile, fd));
        return 0;
@@ -851,15 +1000,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
 {
        int pwake = 0;
        unsigned int revents;
-        unsigned long flags;
        /*
-         * Set the new event interest mask before calling f_op->poll(), otherwise
+         * Set the new event interest mask before calling f_op->poll();
-         * a potential race might occur. In fact if we do this operation inside
+         * otherwise we might miss an event that happens between the
-         * the lock, an event might happen between the f_op->poll() call and the
+         * f_op->poll() call and the new event set registering.
-         * new event set registering.
         */
        epi->event.events = event->events;
+        epi->event.data = event->data; /* protected by mtx */
        /*
         * Get current event bits. We can safely use the file* here because
@@ -867,16 +1015,12 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
         */
        revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
-        spin_lock_irqsave(&ep->lock, flags);
-        /* Copy the data member from inside the lock */
-        epi->event.data = event->data;
        /*
         * If the item is "hot" and it is not registered inside the ready
         * list, push it inside.
         */
        if (revents & event->events) {
+                spin_lock_irq(&ep->lock);
                if (!ep_is_linked(&epi->rdllink)) {
                        list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -886,142 +1030,84 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
                        if (waitqueue_active(&ep->poll_wait))
                                pwake++;
                }
+                spin_unlock_irq(&ep->lock);
        }
-        spin_unlock_irqrestore(&ep->lock, flags);
        /* We have to call this outside the lock */
        if (pwake)
-                ep_poll_safewake(&psw, &ep->poll_wait);
+                ep_poll_safewake(&ep->poll_wait);
        return 0;
 }
-static int ep_send_events(struct eventpoll *ep, struct epoll_event __user *events,
+static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
-                          int maxevents)
+                               void *priv)
 {
-        int eventcnt, error = -EFAULT, pwake = 0;
+        struct ep_send_events_data *esed = priv;
+        int eventcnt;
        unsigned int revents;
-        unsigned long flags;
+        struct epitem *epi;
-        struct epitem *epi, *nepi;
+        struct epoll_event __user *uevent;
-        struct list_head txlist;
-        INIT_LIST_HEAD(&txlist);
-        /*
-         * We need to lock this because we could be hit by
-         * eventpoll_release_file() and epoll_ctl(EPOLL_CTL_DEL).
-         */
-        mutex_lock(&ep->mtx);
-        /*
-         * Steal the ready list, and re-init the original one to the
-         * empty list. Also, set ep->ovflist to NULL so that events
-         * happening while looping w/out locks, are not lost. We cannot
-         * have the poll callback to queue directly on ep->rdllist,
-         * because we are doing it in the loop below, in a lockless way.
-         */
-        spin_lock_irqsave(&ep->lock, flags);
-        list_splice(&ep->rdllist, &txlist);
-        INIT_LIST_HEAD(&ep->rdllist);
-        ep->ovflist = NULL;
-        spin_unlock_irqrestore(&ep->lock, flags);
        /*
-         * We can loop without lock because this is a task private list.
+         * We can loop without lock because we are passed a task private list.
-         * We just splice'd out the ep->rdllist in ep_collect_ready_items().
+         * Items cannot vanish during the loop because ep_scan_ready_list() is
-         * Items cannot vanish during the loop because we are holding "mtx".
+         * holding "mtx" during this call.
         */
-        for (eventcnt = 0; !list_empty(&txlist) && eventcnt < maxevents;) {
+        for (eventcnt = 0, uevent = esed->events;
-                epi = list_first_entry(&txlist, struct epitem, rdllink);
+             !list_empty(head) && eventcnt < esed->maxevents;) {
+                epi = list_first_entry(head, struct epitem, rdllink);
                list_del_init(&epi->rdllink);
-                /*
+                revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL) &
-                 * Get the ready file event set. We can safely use the file
+                        epi->event.events;
-                 * because we are holding the "mtx" and this will guarantee
-                 * that both the file and the item will not vanish.
-                 */
-                revents = epi->ffd.file->f_op->poll(epi->ffd.file, NULL);
-                revents &= epi->event.events;
                /*
-                 * Is the event mask intersect the caller-requested one,
+                 * If the event mask intersect the caller-requested one,
-                 * deliver the event to userspace. Again, we are holding
+                 * deliver the event to userspace. Again, ep_scan_ready_list()
-                 * "mtx", so no operations coming from userspace can change
+                 * is holding "mtx", so no operations coming from userspace
-                 * the item.
+                 * can change the item.
                 */
                if (revents) {
-                        if (__put_user(revents,
+                        if (__put_user(revents, &uevent->events) ||
-                                       &events[eventcnt].events) ||
+                            __put_user(epi->event.data, &uevent->data)) {
-                            __put_user(epi->event.data,
+                                list_add(&epi->rdllink, head);
-                                       &events[eventcnt].data))
+                                return eventcnt ? eventcnt : -EFAULT;
-                                goto errxit;
+                        }
+                        eventcnt++;
+                        uevent++;
                        if (epi->event.events & EPOLLONESHOT)
                                epi->event.events &= EP_PRIVATE_BITS;
-                        eventcnt++;
+                        else if (!(epi->event.events & EPOLLET)) {
+                                /*
+                                 * If this file has been added with Level
+                                 * Trigger mode, we need to insert back inside
+                                 * the ready list, so that the next call to
+                                 * epoll_wait() will check again the events
+                                 * availability. At this point, noone can insert
+                                 * into ep->rdllist besides us. The epoll_ctl()
+                                 * callers are locked out by
+                                 * ep_scan_ready_list() holding "mtx" and the
+                                 * poll callback will queue them in ep->ovflist.
+                                 */
+                                list_add_tail(&epi->rdllink, &ep->rdllist);
+                        }
                }
-                /*
-                 * At this point, noone can insert into ep->rdllist besides
-                 * us. The epoll_ctl() callers are locked out by us holding
-                 * "mtx" and the poll callback will queue them in ep->ovflist.
-                 */
-                if (!(epi->event.events & EPOLLET) &&
-                    (revents & epi->event.events))
-                        list_add_tail(&epi->rdllink, &ep->rdllist);
-        }
-        error = 0;
-errxit:
-        spin_lock_irqsave(&ep->lock, flags);
-        /*
-         * During the time we spent in the loop above, some other events
-         * might have been queued by the poll callback. We re-insert them
-         * inside the main ready-list here.
-         */
-        for (nepi = ep->ovflist; (epi = nepi) != NULL;
-             nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
-                /*
-                 * If the above loop quit with errors, the epoll item might still
-                 * be linked to "txlist", and the list_splice() done below will
-                 * take care of those cases.
-                 */
-                if (!ep_is_linked(&epi->rdllink))
-                        list_add_tail(&epi->rdllink, &ep->rdllist);
        }
-        /*
-         * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
-         * releasing the lock, events will be queued in the normal way inside
-         * ep->rdllist.
-         */
-        ep->ovflist = EP_UNACTIVE_PTR;
-        /*
+        return eventcnt;
-         * In case of error in the event-send loop, or in case the number of
+}
-         * ready events exceeds the userspace limit, we need to splice the
-         * "txlist" back inside ep->rdllist.
-         */
-        list_splice(&txlist, &ep->rdllist);
-        if (!list_empty(&ep->rdllist)) {
-                /*
-                 * Wake up (if active) both the eventpoll wait list and the ->poll()
-                 * wait list (delayed after we release the lock).
-                 */
-                if (waitqueue_active(&ep->wq))
-                        wake_up_locked(&ep->wq);
-                if (waitqueue_active(&ep->poll_wait))
-                        pwake++;
-        }
-        spin_unlock_irqrestore(&ep->lock, flags);
-        mutex_unlock(&ep->mtx);
+static int ep_send_events(struct eventpoll *ep,
+                          struct epoll_event __user *events, int maxevents)
+{
+        struct ep_send_events_data esed;
-        /* We have to call this outside the lock */
+        esed.maxevents = maxevents;
-        if (pwake)
+        esed.events = events;
-                ep_poll_safewake(&psw, &ep->poll_wait);
-        return eventcnt == 0 ? error: eventcnt;
+        return ep_scan_ready_list(ep, ep_send_events_proc, &esed);
 }
 static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
@@ -1033,7 +1119,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
        wait_queue_t wait;
        /*
-         * Calculate the timeout by checking for the "infinite" value ( -1 )
+         * Calculate the timeout by checking for the "infinite" value (-1)
         * and the overflow condition. The passed timeout is in milliseconds,
         * that why (t * HZ) / 1000.
         */
@@ -1076,9 +1162,8 @@ retry:
                set_current_state(TASK_RUNNING);
        }
        /* Is it worth to try to dig for events ? */
-        eavail = !list_empty(&ep->rdllist);
+        eavail = !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR;
        spin_unlock_irqrestore(&ep->lock, flags);
@@ -1099,41 +1184,30 @@ retry:
 */
 SYSCALL_DEFINE1(epoll_create1, int, flags)
 {
-        int error, fd = -1;
+        int error;
-        struct eventpoll *ep;
+        struct eventpoll *ep = NULL;
        /* Check the EPOLL_* constant for consistency.  */
        BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
        if (flags & ~EPOLL_CLOEXEC)
                return -EINVAL;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
-                     current, flags));
        /*
-         * Create the internal data structure ( "struct eventpoll" ).
+         * Create the internal data structure ("struct eventpoll").
         */
        error = ep_alloc(&ep);
-        if (error < 0) {
+        if (error < 0)
-                fd = error;
+                return error;
-                goto error_return;
-        }
        /*
         * Creates all the items needed to setup an eventpoll file. That is,
         * a file structure and a free file descriptor.
         */
-        fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
+        error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
-                              flags & O_CLOEXEC);
+                                 flags & O_CLOEXEC);
-        if (fd < 0)
+        if (error < 0)
                ep_free(ep);
-error_return:
+        return error;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
-                     current, flags, fd));
-        return fd;
 }
 SYSCALL_DEFINE1(epoll_create, int, size)
@@ -1158,9 +1232,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
        struct epitem *epi;
        struct epoll_event epds;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p)\n",
-                     current, epfd, op, fd, event));
        error = -EFAULT;
        if (ep_op_has_event(op) &&
            copy_from_user(&epds, event, sizeof(struct epoll_event)))
@@ -1211,7 +1282,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
        case EPOLL_CTL_ADD:
                if (!epi) {
                        epds.events |= POLLERR | POLLHUP;
                        error = ep_insert(ep, &epds, tfile, fd);
                } else
                        error = -EEXIST;
@@ -1237,8 +1307,6 @@ error_tgt_fput:
 error_fput:
        fput(file);
 error_return:
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_ctl(%d, %d, %d, %p) = %d\n",
-                     current, epfd, op, fd, event, error));
        return error;
 }
@@ -1254,9 +1322,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
        struct file *file;
        struct eventpoll *ep;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d)\n",
-                     current, epfd, events, maxevents, timeout));
        /* The maximum number of event must be greater than zero */
        if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
                return -EINVAL;
@@ -1293,8 +1358,6 @@ SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
 error_fput:
        fput(file);
 error_return:
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_wait(%d, %p, %d, %d) = %d\n",
-                     current, epfd, events, maxevents, timeout, error));
        return error;
 }
@@ -1359,17 +1422,18 @@ static int __init eventpoll_init(void)
                EP_ITEM_COST;
        /* Initialize the structure used to perform safe poll wait head wake ups */
-        ep_poll_safewake_init(&psw);
+        ep_nested_calls_init(&poll_safewake_ncalls);
+        /* Initialize the structure used to perform file's f_op->poll() calls */
+        ep_nested_calls_init(&poll_readywalk_ncalls);
        /* Allocates slab cache used to allocate "struct epitem" items */
        epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
-                        0, SLAB_HWCACHE_ALIGN|EPI_SLAB_DEBUG|SLAB_PANIC,
+                        0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
-                        NULL);
        /* Allocates slab cache used to allocate "struct eppoll_entry" */
        pwq_cache = kmem_cache_create("eventpoll_pwq",
-                        sizeof(struct eppoll_entry), 0,
+                        sizeof(struct eppoll_entry), 0, SLAB_PANIC, NULL);
-                        EPI_SLAB_DEBUG|SLAB_PANIC, NULL);
        return 0;
 }
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6083bb38057b..990c94000924 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1098,7 +1098,7 @@ extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_block_truncate_page(handle_t *handle,
                struct address_space *mapping, loff_t from);
-extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page);
+extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t ext4_get_reserved_space(struct inode *inode);
 /* ioctl.c */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 71d3ecd5db79..dd82ff390067 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5146,8 +5146,9 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
        return !buffer_mapped(bh);
 }
-int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        loff_t size;
        unsigned long len;
        int ret = -EINVAL;
@@ -5199,6 +5200,8 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page)
                goto out_unlock;
        ret = 0;
 out_unlock:
+        if (ret)
+                ret = VM_FAULT_SIGBUS;
        up_read(&inode->i_alloc_sem);
        return ret;
 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 821d10f719bd..4e340fedf768 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1234,8 +1234,9 @@ static void fuse_vma_close(struct vm_area_struct *vma)
 * - sync(2)
 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
 */
-static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        /*
         * Don't use page->mapping as it may become NULL from a
         * concurrent truncate.
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 3b9e8de3500b..70b9b8548945 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -337,8 +337,9 @@ static int gfs2_allocate_page_backing(struct page *page)
 * blocks allocated on disk to back that page.
 */
-static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -412,6 +413,8 @@ out_unlock:
        gfs2_glock_dq(&gh);
 out:
        gfs2_holder_uninit(&gh);
+        if (ret)
+                ret = VM_FAULT_SIGBUS;
        return ret;
 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9b800d97a687..23a3c76711e0 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -943,14 +943,13 @@ static struct vfsmount *hugetlbfs_vfsmount;
 static int can_do_hugetlb_shm(void)
 {
-        return likely(capable(CAP_IPC_LOCK) ||
+        return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
-                        in_group_p(sysctl_hugetlb_shm_group) ||
-                        can_do_mlock());
 }
 struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag)
 {
        int error = -ENOMEM;
+        int unlock_shm = 0;
        struct file *file;
        struct inode *inode;
        struct dentry *dentry, *root;
@@ -960,11 +959,14 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag)
        if (!hugetlbfs_vfsmount)
                return ERR_PTR(-ENOENT);
-        if (!can_do_hugetlb_shm())
+        if (!can_do_hugetlb_shm()) {
-                return ERR_PTR(-EPERM);
+                if (user_shm_lock(size, user)) {
+                        unlock_shm = 1;
-        if (!user_shm_lock(size, user))
+                        WARN_ONCE(1,
-                return ERR_PTR(-ENOMEM);
+                          "Using mlock ulimits for SHM_HUGETLB deprecated\n");
+                } else
+                        return ERR_PTR(-EPERM);
+        }
        root = hugetlbfs_vfsmount->mnt_root;
        quick_string.name = name;
@@ -1004,7 +1006,8 @@ out_inode:
 out_dentry:
        dput(dentry);
 out_shm_unlock:
-        user_shm_unlock(size, user);
+        if (unlock_shm)
+                user_shm_unlock(size, user);
        return ERR_PTR(error);
 }
diff --git a/fs/jfs/jfs_debug.c b/fs/jfs/jfs_debug.c
index 6a73de84bcef..dd824d9b0b1a 100644
--- a/fs/jfs/jfs_debug.c
+++ b/fs/jfs/jfs_debug.c
@@ -90,7 +90,6 @@ void jfs_proc_init(void)
        if (!(base = proc_mkdir("fs/jfs", NULL)))
                return;
-        base->owner = THIS_MODULE;
        for (i = 0; i < NPROCENT; i++)
                proc_create(Entries[i].name, 0, base, Entries[i].proc_fops);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 574158ae2398..2277421656e7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1606,8 +1606,6 @@ int __init nfs_fs_proc_init(void)
        if (!proc_fs_nfs)
                goto error_0;
-        proc_fs_nfs->owner = THIS_MODULE;
        /* a file of servers with which we're dealing */
        p = proc_create("servers", S_IFREG|S_IRUGO,
                        proc_fs_nfs, &nfs_server_list_fops);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 90f292b520d2..cec79392e4ba 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -451,8 +451,9 @@ const struct address_space_operations nfs_file_aops = {
        .launder_page = nfs_launder_page,
 };
-static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        struct file *filp = vma->vm_file;
        struct dentry *dentry = filp->f_path.dentry;
        unsigned pagelen;
@@ -483,6 +484,8 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
                ret = pagelen;
 out_unlock:
        unlock_page(page);
+        if (ret)
+                ret = VM_FAULT_SIGBUS;
        return ret;
 }
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 34314b33dbd4..5a9e34475e37 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -32,8 +32,8 @@
 /**
 * The little endian Unicode string $I30 as a global constant.
 */
-ntfschar I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'),
+ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
-                const_cpu_to_le16('3'), const_cpu_to_le16('0'), 0 };
+                cpu_to_le16('3'),       cpu_to_le16('0'), 0 };
 /**
 * ntfs_lookup_inode_by_name - find an inode in a directory given its name
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 86bef156cf0a..82c5085559c6 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1975,8 +1975,7 @@ int ntfs_read_inode_mount(struct inode *vi)
                                goto em_put_err_out;
                        next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
                                        le16_to_cpu(al_entry->length));
-                        if (le32_to_cpu(al_entry->type) >
+                        if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA))
-                                        const_le32_to_cpu(AT_DATA))
                                goto em_put_err_out;
                        if (AT_DATA != al_entry->type)
                                continue;
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 1e383328eceb..50931b1ce4b9 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -31,19 +31,8 @@
 #include "types.h"
-/*
- * Constant endianness conversion defines.
- */
-#define const_le16_to_cpu(x)    __constant_le16_to_cpu(x)
-#define const_le32_to_cpu(x)    __constant_le32_to_cpu(x)
-#define const_le64_to_cpu(x)    __constant_le64_to_cpu(x)
-#define const_cpu_to_le16(x)    __constant_cpu_to_le16(x)
-#define const_cpu_to_le32(x)    __constant_cpu_to_le32(x)
-#define const_cpu_to_le64(x)    __constant_cpu_to_le64(x)
 /* The NTFS oem_id "NTFS    " */
-#define magicNTFS       const_cpu_to_le64(0x202020205346544eULL)
+#define magicNTFS       cpu_to_le64(0x202020205346544eULL)
 /*
 * Location of bootsector on partition:
@@ -114,25 +103,25 @@ typedef struct {
 */
 enum {
        /* Found in $MFT/$DATA. */
-        magic_FILE = const_cpu_to_le32(0x454c4946), /* Mft entry. */
+        magic_FILE = cpu_to_le32(0x454c4946), /* Mft entry. */
-        magic_INDX = const_cpu_to_le32(0x58444e49), /* Index buffer. */
+        magic_INDX = cpu_to_le32(0x58444e49), /* Index buffer. */
-        magic_HOLE = const_cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */
+        magic_HOLE = cpu_to_le32(0x454c4f48), /* ? (NTFS 3.0+?) */
        /* Found in $LogFile/$DATA. */
-        magic_RSTR = const_cpu_to_le32(0x52545352), /* Restart page. */
+        magic_RSTR = cpu_to_le32(0x52545352), /* Restart page. */
-        magic_RCRD = const_cpu_to_le32(0x44524352), /* Log record page. */
+        magic_RCRD = cpu_to_le32(0x44524352), /* Log record page. */
        /* Found in $LogFile/$DATA.  (May be found in $MFT/$DATA, also?) */
-        magic_CHKD = const_cpu_to_le32(0x444b4843), /* Modified by chkdsk. */
+        magic_CHKD = cpu_to_le32(0x444b4843), /* Modified by chkdsk. */
        /* Found in all ntfs record containing records. */
-        magic_BAAD = const_cpu_to_le32(0x44414142), /* Failed multi sector
+        magic_BAAD = cpu_to_le32(0x44414142), /* Failed multi sector
                                                       transfer was detected. */
        /*
         * Found in $LogFile/$DATA when a page is full of 0xff bytes and is
         * thus not initialized.  Page must be initialized before using it.
         */
-        magic_empty = const_cpu_to_le32(0xffffffff) /* Record is empty. */
+        magic_empty = cpu_to_le32(0xffffffff) /* Record is empty. */
 };
 typedef le32 NTFS_RECORD_TYPE;
@@ -258,8 +247,8 @@ typedef enum {
 * information about the mft record in which they are present.
 */
 enum {
-        MFT_RECORD_IN_USE       = const_cpu_to_le16(0x0001),
+        MFT_RECORD_IN_USE       = cpu_to_le16(0x0001),
-        MFT_RECORD_IS_DIRECTORY = const_cpu_to_le16(0x0002),
+        MFT_RECORD_IS_DIRECTORY = cpu_to_le16(0x0002),
 } __attribute__ ((__packed__));
 typedef le16 MFT_RECORD_FLAGS;
@@ -309,7 +298,7 @@ typedef le16 MFT_RECORD_FLAGS;
 * Note: The _LE versions will return a CPU endian formatted value!
 */
 #define MFT_REF_MASK_CPU 0x0000ffffffffffffULL
-#define MFT_REF_MASK_LE const_cpu_to_le64(MFT_REF_MASK_CPU)
+#define MFT_REF_MASK_LE cpu_to_le64(MFT_REF_MASK_CPU)
 typedef u64 MFT_REF;
 typedef le64 leMFT_REF;
@@ -477,25 +466,25 @@ typedef struct {
 * a revealing choice of symbol I do not know what is... (-;
 */
 enum {
-        AT_UNUSED                       = const_cpu_to_le32(         0),
+        AT_UNUSED                       = cpu_to_le32(         0),
-        AT_STANDARD_INFORMATION         = const_cpu_to_le32(      0x10),
+        AT_STANDARD_INFORMATION         = cpu_to_le32(      0x10),
-        AT_ATTRIBUTE_LIST               = const_cpu_to_le32(      0x20),
+        AT_ATTRIBUTE_LIST               = cpu_to_le32(      0x20),
-        AT_FILE_NAME                    = const_cpu_to_le32(      0x30),
+        AT_FILE_NAME                    = cpu_to_le32(      0x30),
-        AT_OBJECT_ID                    = const_cpu_to_le32(      0x40),
+        AT_OBJECT_ID                    = cpu_to_le32(      0x40),
-        AT_SECURITY_DESCRIPTOR          = const_cpu_to_le32(      0x50),
+        AT_SECURITY_DESCRIPTOR          = cpu_to_le32(      0x50),
-        AT_VOLUME_NAME                  = const_cpu_to_le32(      0x60),
+        AT_VOLUME_NAME                  = cpu_to_le32(      0x60),
-        AT_VOLUME_INFORMATION           = const_cpu_to_le32(      0x70),
+        AT_VOLUME_INFORMATION           = cpu_to_le32(      0x70),
-        AT_DATA                         = const_cpu_to_le32(      0x80),
+        AT_DATA                         = cpu_to_le32(      0x80),
-        AT_INDEX_ROOT                   = const_cpu_to_le32(      0x90),
+        AT_INDEX_ROOT                   = cpu_to_le32(      0x90),
-        AT_INDEX_ALLOCATION             = const_cpu_to_le32(      0xa0),
+        AT_INDEX_ALLOCATION             = cpu_to_le32(      0xa0),
-        AT_BITMAP                       = const_cpu_to_le32(      0xb0),
+        AT_BITMAP                       = cpu_to_le32(      0xb0),
-        AT_REPARSE_POINT                = const_cpu_to_le32(      0xc0),
+        AT_REPARSE_POINT                = cpu_to_le32(      0xc0),
-        AT_EA_INFORMATION               = const_cpu_to_le32(      0xd0),
+        AT_EA_INFORMATION               = cpu_to_le32(      0xd0),
-        AT_EA                           = const_cpu_to_le32(      0xe0),
+        AT_EA                           = cpu_to_le32(      0xe0),
-        AT_PROPERTY_SET                 = const_cpu_to_le32(      0xf0),
+        AT_PROPERTY_SET                 = cpu_to_le32(      0xf0),
-        AT_LOGGED_UTILITY_STREAM        = const_cpu_to_le32(     0x100),
+        AT_LOGGED_UTILITY_STREAM        = cpu_to_le32(     0x100),
-        AT_FIRST_USER_DEFINED_ATTRIBUTE = const_cpu_to_le32(    0x1000),
+        AT_FIRST_USER_DEFINED_ATTRIBUTE = cpu_to_le32(    0x1000),
-        AT_END                          = const_cpu_to_le32(0xffffffff)
+        AT_END                          = cpu_to_le32(0xffffffff)
 };
 typedef le32 ATTR_TYPE;
@@ -539,13 +528,13 @@ typedef le32 ATTR_TYPE;
 *      equal then the second le32 values would be compared, etc.
 */
 enum {
-        COLLATION_BINARY                = const_cpu_to_le32(0x00),
+        COLLATION_BINARY                = cpu_to_le32(0x00),
-        COLLATION_FILE_NAME             = const_cpu_to_le32(0x01),
+        COLLATION_FILE_NAME             = cpu_to_le32(0x01),
-        COLLATION_UNICODE_STRING        = const_cpu_to_le32(0x02),
+        COLLATION_UNICODE_STRING        = cpu_to_le32(0x02),
-        COLLATION_NTOFS_ULONG           = const_cpu_to_le32(0x10),
+        COLLATION_NTOFS_ULONG           = cpu_to_le32(0x10),
-        COLLATION_NTOFS_SID             = const_cpu_to_le32(0x11),
+        COLLATION_NTOFS_SID             = cpu_to_le32(0x11),
-        COLLATION_NTOFS_SECURITY_HASH   = const_cpu_to_le32(0x12),
+        COLLATION_NTOFS_SECURITY_HASH   = cpu_to_le32(0x12),
-        COLLATION_NTOFS_ULONGS          = const_cpu_to_le32(0x13),
+        COLLATION_NTOFS_ULONGS          = cpu_to_le32(0x13),
 };
 typedef le32 COLLATION_RULE;
@@ -559,25 +548,25 @@ typedef le32 COLLATION_RULE;
 * NT4.
 */
 enum {
-        ATTR_DEF_INDEXABLE      = const_cpu_to_le32(0x02), /* Attribute can be
+        ATTR_DEF_INDEXABLE      = cpu_to_le32(0x02), /* Attribute can be
                                        indexed. */
-        ATTR_DEF_MULTIPLE       = const_cpu_to_le32(0x04), /* Attribute type
+        ATTR_DEF_MULTIPLE       = cpu_to_le32(0x04), /* Attribute type
                                        can be present multiple times in the
                                        mft records of an inode. */
-        ATTR_DEF_NOT_ZERO       = const_cpu_to_le32(0x08), /* Attribute value
+        ATTR_DEF_NOT_ZERO       = cpu_to_le32(0x08), /* Attribute value
                                        must contain at least one non-zero
                                        byte. */
-        ATTR_DEF_INDEXED_UNIQUE = const_cpu_to_le32(0x10), /* Attribute must be
+        ATTR_DEF_INDEXED_UNIQUE = cpu_to_le32(0x10), /* Attribute must be
                                        indexed and the attribute value must be
                                        unique for the attribute type in all of
                                        the mft records of an inode. */
-        ATTR_DEF_NAMED_UNIQUE   = const_cpu_to_le32(0x20), /* Attribute must be
+        ATTR_DEF_NAMED_UNIQUE   = cpu_to_le32(0x20), /* Attribute must be
                                        named and the name must be unique for
                                        the attribute type in all of the mft
                                        records of an inode. */
-        ATTR_DEF_RESIDENT       = const_cpu_to_le32(0x40), /* Attribute must be
+        ATTR_DEF_RESIDENT       = cpu_to_le32(0x40), /* Attribute must be
                                        resident. */
-        ATTR_DEF_ALWAYS_LOG     = const_cpu_to_le32(0x80), /* Always log
+        ATTR_DEF_ALWAYS_LOG     = cpu_to_le32(0x80), /* Always log
                                        modifications to this attribute,
                                        regardless of whether it is resident or
                                        non-resident.  Without this, only log
@@ -614,12 +603,12 @@ typedef struct {
 * Attribute flags (16-bit).
 */
 enum {
-        ATTR_IS_COMPRESSED    = const_cpu_to_le16(0x0001),
+        ATTR_IS_COMPRESSED    = cpu_to_le16(0x0001),
-        ATTR_COMPRESSION_MASK = const_cpu_to_le16(0x00ff), /* Compression method
+        ATTR_COMPRESSION_MASK = cpu_to_le16(0x00ff), /* Compression method
                                                              mask.  Also, first
                                                              illegal value. */
-        ATTR_IS_ENCRYPTED     = const_cpu_to_le16(0x4000),
+        ATTR_IS_ENCRYPTED     = cpu_to_le16(0x4000),
-        ATTR_IS_SPARSE        = const_cpu_to_le16(0x8000),
+        ATTR_IS_SPARSE        = cpu_to_le16(0x8000),
 } __attribute__ ((__packed__));
 typedef le16 ATTR_FLAGS;
@@ -811,32 +800,32 @@ typedef ATTR_RECORD ATTR_REC;
 * flags appear in all of the above.
 */
 enum {
-        FILE_ATTR_READONLY              = const_cpu_to_le32(0x00000001),
+        FILE_ATTR_READONLY              = cpu_to_le32(0x00000001),
-        FILE_ATTR_HIDDEN                = const_cpu_to_le32(0x00000002),
+        FILE_ATTR_HIDDEN                = cpu_to_le32(0x00000002),
-        FILE_ATTR_SYSTEM                = const_cpu_to_le32(0x00000004),
+        FILE_ATTR_SYSTEM                = cpu_to_le32(0x00000004),
-        /* Old DOS volid. Unused in NT. = const_cpu_to_le32(0x00000008), */
+        /* Old DOS volid. Unused in NT. = cpu_to_le32(0x00000008), */
-        FILE_ATTR_DIRECTORY             = const_cpu_to_le32(0x00000010),
+        FILE_ATTR_DIRECTORY             = cpu_to_le32(0x00000010),
        /* Note, FILE_ATTR_DIRECTORY is not considered valid in NT.  It is
           reserved for the DOS SUBDIRECTORY flag. */
-        FILE_ATTR_ARCHIVE               = const_cpu_to_le32(0x00000020),
+        FILE_ATTR_ARCHIVE               = cpu_to_le32(0x00000020),
-        FILE_ATTR_DEVICE                = const_cpu_to_le32(0x00000040),
+        FILE_ATTR_DEVICE                = cpu_to_le32(0x00000040),
-        FILE_ATTR_NORMAL                = const_cpu_to_le32(0x00000080),
+        FILE_ATTR_NORMAL                = cpu_to_le32(0x00000080),
-        FILE_ATTR_TEMPORARY             = const_cpu_to_le32(0x00000100),
+        FILE_ATTR_TEMPORARY             = cpu_to_le32(0x00000100),
-        FILE_ATTR_SPARSE_FILE           = const_cpu_to_le32(0x00000200),
+        FILE_ATTR_SPARSE_FILE           = cpu_to_le32(0x00000200),
-        FILE_ATTR_REPARSE_POINT         = const_cpu_to_le32(0x00000400),
+        FILE_ATTR_REPARSE_POINT         = cpu_to_le32(0x00000400),
-        FILE_ATTR_COMPRESSED            = const_cpu_to_le32(0x00000800),
+        FILE_ATTR_COMPRESSED            = cpu_to_le32(0x00000800),
-        FILE_ATTR_OFFLINE               = const_cpu_to_le32(0x00001000),
+        FILE_ATTR_OFFLINE               = cpu_to_le32(0x00001000),
-        FILE_ATTR_NOT_CONTENT_INDEXED   = const_cpu_to_le32(0x00002000),
+        FILE_ATTR_NOT_CONTENT_INDEXED   = cpu_to_le32(0x00002000),
-        FILE_ATTR_ENCRYPTED             = const_cpu_to_le32(0x00004000),
+        FILE_ATTR_ENCRYPTED             = cpu_to_le32(0x00004000),
-        FILE_ATTR_VALID_FLAGS           = const_cpu_to_le32(0x00007fb7),
+        FILE_ATTR_VALID_FLAGS           = cpu_to_le32(0x00007fb7),
        /* Note, FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the
           FILE_ATTR_DEVICE and preserves everything else.  This mask is used
           to obtain all flags that are valid for reading. */
-        FILE_ATTR_VALID_SET_FLAGS       = const_cpu_to_le32(0x000031a7),
+        FILE_ATTR_VALID_SET_FLAGS       = cpu_to_le32(0x000031a7),
        /* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the
           F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
           F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest.  This mask
@@ -846,11 +835,11 @@ enum {
         * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
         * attribute of an mft record.
         */
-        FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT   = const_cpu_to_le32(0x10000000),
+        FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT   = cpu_to_le32(0x10000000),
        /* Note, this is a copy of the corresponding bit from the mft record,
           telling us whether this is a directory or not, i.e. whether it has
           an index root attribute or not. */
-        FILE_ATTR_DUP_VIEW_INDEX_PRESENT        = const_cpu_to_le32(0x20000000),
+        FILE_ATTR_DUP_VIEW_INDEX_PRESENT        = cpu_to_le32(0x20000000),
        /* Note, this is a copy of the corresponding bit from the mft record,
           telling us whether this file has a view index present (eg. object id
           index, quota index, one of the security indexes or the encrypting
@@ -1446,42 +1435,42 @@ enum {
        /* Specific rights for files and directories are as follows: */
        /* Right to read data from the file. (FILE) */
-        FILE_READ_DATA                  = const_cpu_to_le32(0x00000001),
+        FILE_READ_DATA                  = cpu_to_le32(0x00000001),
        /* Right to list contents of a directory. (DIRECTORY) */
-        FILE_LIST_DIRECTORY             = const_cpu_to_le32(0x00000001),
+        FILE_LIST_DIRECTORY             = cpu_to_le32(0x00000001),
        /* Right to write data to the file. (FILE) */
-        FILE_WRITE_DATA                 = const_cpu_to_le32(0x00000002),
+        FILE_WRITE_DATA                 = cpu_to_le32(0x00000002),
        /* Right to create a file in the directory. (DIRECTORY) */
-        FILE_ADD_FILE                   = const_cpu_to_le32(0x00000002),
+        FILE_ADD_FILE                   = cpu_to_le32(0x00000002),
        /* Right to append data to the file. (FILE) */
-        FILE_APPEND_DATA                = const_cpu_to_le32(0x00000004),
+        FILE_APPEND_DATA                = cpu_to_le32(0x00000004),
        /* Right to create a subdirectory. (DIRECTORY) */
-        FILE_ADD_SUBDIRECTORY           = const_cpu_to_le32(0x00000004),
+        FILE_ADD_SUBDIRECTORY           = cpu_to_le32(0x00000004),
        /* Right to read extended attributes. (FILE/DIRECTORY) */
-        FILE_READ_EA                    = const_cpu_to_le32(0x00000008),
+        FILE_READ_EA                    = cpu_to_le32(0x00000008),
        /* Right to write extended attributes. (FILE/DIRECTORY) */
-        FILE_WRITE_EA                   = const_cpu_to_le32(0x00000010),
+        FILE_WRITE_EA                   = cpu_to_le32(0x00000010),
        /* Right to execute a file. (FILE) */
-        FILE_EXECUTE                    = const_cpu_to_le32(0x00000020),
+        FILE_EXECUTE                    = cpu_to_le32(0x00000020),
        /* Right to traverse the directory. (DIRECTORY) */
-        FILE_TRAVERSE                   = const_cpu_to_le32(0x00000020),
+        FILE_TRAVERSE                   = cpu_to_le32(0x00000020),
        /*
         * Right to delete a directory and all the files it contains (its
         * children), even if the files are read-only. (DIRECTORY)
         */
-        FILE_DELETE_CHILD               = const_cpu_to_le32(0x00000040),
+        FILE_DELETE_CHILD               = cpu_to_le32(0x00000040),
        /* Right to read file attributes. (FILE/DIRECTORY) */
-        FILE_READ_ATTRIBUTES            = const_cpu_to_le32(0x00000080),
+        FILE_READ_ATTRIBUTES            = cpu_to_le32(0x00000080),
        /* Right to change file attributes. (FILE/DIRECTORY) */
-        FILE_WRITE_ATTRIBUTES           = const_cpu_to_le32(0x00000100),
+        FILE_WRITE_ATTRIBUTES           = cpu_to_le32(0x00000100),
        /*
         * The standard rights (bits 16 to 23).  These are independent of the
@@ -1489,27 +1478,27 @@ enum {
         */
        /* Right to delete the object. */
-        DELETE                          = const_cpu_to_le32(0x00010000),
+        DELETE                          = cpu_to_le32(0x00010000),
        /*
         * Right to read the information in the object's security descriptor,
         * not including the information in the SACL, i.e. right to read the
         * security descriptor and owner.
         */
-        READ_CONTROL                    = const_cpu_to_le32(0x00020000),
+        READ_CONTROL                    = cpu_to_le32(0x00020000),
        /* Right to modify the DACL in the object's security descriptor. */
-        WRITE_DAC                       = const_cpu_to_le32(0x00040000),
+        WRITE_DAC                       = cpu_to_le32(0x00040000),
        /* Right to change the owner in the object's security descriptor. */
-        WRITE_OWNER                     = const_cpu_to_le32(0x00080000),
+        WRITE_OWNER                     = cpu_to_le32(0x00080000),
        /*
         * Right to use the object for synchronization.  Enables a process to
         * wait until the object is in the signalled state.  Some object types
         * do not support this access right.
         */
-        SYNCHRONIZE                     = const_cpu_to_le32(0x00100000),
+        SYNCHRONIZE                     = cpu_to_le32(0x00100000),
        /*
         * The following STANDARD_RIGHTS_* are combinations of the above for
@@ -1517,25 +1506,25 @@ enum {
         */
        /* These are currently defined to READ_CONTROL. */
-        STANDARD_RIGHTS_READ            = const_cpu_to_le32(0x00020000),
+        STANDARD_RIGHTS_READ            = cpu_to_le32(0x00020000),
-        STANDARD_RIGHTS_WRITE           = const_cpu_to_le32(0x00020000),
+        STANDARD_RIGHTS_WRITE           = cpu_to_le32(0x00020000),
-        STANDARD_RIGHTS_EXECUTE         = const_cpu_to_le32(0x00020000),
+        STANDARD_RIGHTS_EXECUTE         = cpu_to_le32(0x00020000),
        /* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */
-        STANDARD_RIGHTS_REQUIRED        = const_cpu_to_le32(0x000f0000),
+        STANDARD_RIGHTS_REQUIRED        = cpu_to_le32(0x000f0000),
        /*
         * Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and
         * SYNCHRONIZE access.
         */
-        STANDARD_RIGHTS_ALL             = const_cpu_to_le32(0x001f0000),
+        STANDARD_RIGHTS_ALL             = cpu_to_le32(0x001f0000),
        /*
         * The access system ACL and maximum allowed access types (bits 24 to
         * 25, bits 26 to 27 are reserved).
         */
-        ACCESS_SYSTEM_SECURITY          = const_cpu_to_le32(0x01000000),
+        ACCESS_SYSTEM_SECURITY          = cpu_to_le32(0x01000000),
-        MAXIMUM_ALLOWED                 = const_cpu_to_le32(0x02000000),
+        MAXIMUM_ALLOWED                 = cpu_to_le32(0x02000000),
        /*
         * The generic rights (bits 28 to 31).  These map onto the standard and
@@ -1543,10 +1532,10 @@ enum {
         */
        /* Read, write, and execute access. */
-        GENERIC_ALL                     = const_cpu_to_le32(0x10000000),
+        GENERIC_ALL                     = cpu_to_le32(0x10000000),
        /* Execute access. */
-        GENERIC_EXECUTE                 = const_cpu_to_le32(0x20000000),
+        GENERIC_EXECUTE                 = cpu_to_le32(0x20000000),
        /*
         * Write access.  For files, this maps onto:
@@ -1555,7 +1544,7 @@ enum {
         * For directories, the mapping has the same numerical value.  See
         * above for the descriptions of the rights granted.
         */
-        GENERIC_WRITE                   = const_cpu_to_le32(0x40000000),
+        GENERIC_WRITE                   = cpu_to_le32(0x40000000),
        /*
         * Read access.  For files, this maps onto:
@@ -1564,7 +1553,7 @@ enum {
         * For directories, the mapping has the same numberical value.  See
         * above for the descriptions of the rights granted.
         */
-        GENERIC_READ                    = const_cpu_to_le32(0x80000000),
+        GENERIC_READ                    = cpu_to_le32(0x80000000),
 };
 typedef le32 ACCESS_MASK;
@@ -1604,8 +1593,8 @@ typedef struct {
 * The object ACE flags (32-bit).
 */
 enum {
-        ACE_OBJECT_TYPE_PRESENT                 = const_cpu_to_le32(1),
+        ACE_OBJECT_TYPE_PRESENT                 = cpu_to_le32(1),
-        ACE_INHERITED_OBJECT_TYPE_PRESENT       = const_cpu_to_le32(2),
+        ACE_INHERITED_OBJECT_TYPE_PRESENT       = cpu_to_le32(2),
 };
 typedef le32 OBJECT_ACE_FLAGS;
@@ -1706,23 +1695,23 @@ typedef enum {
 *      expressed as offsets from the beginning of the security descriptor.
 */
 enum {
-        SE_OWNER_DEFAULTED              = const_cpu_to_le16(0x0001),
+        SE_OWNER_DEFAULTED              = cpu_to_le16(0x0001),
-        SE_GROUP_DEFAULTED              = const_cpu_to_le16(0x0002),
+        SE_GROUP_DEFAULTED              = cpu_to_le16(0x0002),
-        SE_DACL_PRESENT                 = const_cpu_to_le16(0x0004),
+        SE_DACL_PRESENT                 = cpu_to_le16(0x0004),
-        SE_DACL_DEFAULTED               = const_cpu_to_le16(0x0008),
+        SE_DACL_DEFAULTED               = cpu_to_le16(0x0008),
-        SE_SACL_PRESENT                 = const_cpu_to_le16(0x0010),
+        SE_SACL_PRESENT                 = cpu_to_le16(0x0010),
-        SE_SACL_DEFAULTED               = const_cpu_to_le16(0x0020),
+        SE_SACL_DEFAULTED               = cpu_to_le16(0x0020),
-        SE_DACL_AUTO_INHERIT_REQ        = const_cpu_to_le16(0x0100),
+        SE_DACL_AUTO_INHERIT_REQ        = cpu_to_le16(0x0100),
-        SE_SACL_AUTO_INHERIT_REQ        = const_cpu_to_le16(0x0200),
+        SE_SACL_AUTO_INHERIT_REQ        = cpu_to_le16(0x0200),
-        SE_DACL_AUTO_INHERITED          = const_cpu_to_le16(0x0400),
+        SE_DACL_AUTO_INHERITED          = cpu_to_le16(0x0400),
-        SE_SACL_AUTO_INHERITED          = const_cpu_to_le16(0x0800),
+        SE_SACL_AUTO_INHERITED          = cpu_to_le16(0x0800),
-        SE_DACL_PROTECTED               = const_cpu_to_le16(0x1000),
+        SE_DACL_PROTECTED               = cpu_to_le16(0x1000),
-        SE_SACL_PROTECTED               = const_cpu_to_le16(0x2000),
+        SE_SACL_PROTECTED               = cpu_to_le16(0x2000),
-        SE_RM_CONTROL_VALID             = const_cpu_to_le16(0x4000),
+        SE_RM_CONTROL_VALID             = cpu_to_le16(0x4000),
-        SE_SELF_RELATIVE                = const_cpu_to_le16(0x8000)
+        SE_SELF_RELATIVE                = cpu_to_le16(0x8000)
 } __attribute__ ((__packed__));
 typedef le16 SECURITY_DESCRIPTOR_CONTROL;
@@ -1910,21 +1899,21 @@ typedef struct {
 * Possible flags for the volume (16-bit).
 */
 enum {
-        VOLUME_IS_DIRTY                 = const_cpu_to_le16(0x0001),
+        VOLUME_IS_DIRTY                 = cpu_to_le16(0x0001),
-        VOLUME_RESIZE_LOG_FILE          = const_cpu_to_le16(0x0002),
+        VOLUME_RESIZE_LOG_FILE          = cpu_to_le16(0x0002),
-        VOLUME_UPGRADE_ON_MOUNT         = const_cpu_to_le16(0x0004),
+        VOLUME_UPGRADE_ON_MOUNT         = cpu_to_le16(0x0004),
-        VOLUME_MOUNTED_ON_NT4           = const_cpu_to_le16(0x0008),
+        VOLUME_MOUNTED_ON_NT4           = cpu_to_le16(0x0008),
-        VOLUME_DELETE_USN_UNDERWAY      = const_cpu_to_le16(0x0010),
+        VOLUME_DELETE_USN_UNDERWAY      = cpu_to_le16(0x0010),
-        VOLUME_REPAIR_OBJECT_ID         = const_cpu_to_le16(0x0020),
+        VOLUME_REPAIR_OBJECT_ID         = cpu_to_le16(0x0020),
-        VOLUME_CHKDSK_UNDERWAY          = const_cpu_to_le16(0x4000),
+        VOLUME_CHKDSK_UNDERWAY          = cpu_to_le16(0x4000),
-        VOLUME_MODIFIED_BY_CHKDSK       = const_cpu_to_le16(0x8000),
+        VOLUME_MODIFIED_BY_CHKDSK       = cpu_to_le16(0x8000),
-        VOLUME_FLAGS_MASK               = const_cpu_to_le16(0xc03f),
+        VOLUME_FLAGS_MASK               = cpu_to_le16(0xc03f),
        /* To make our life easier when checking if we must mount read-only. */
-        VOLUME_MUST_MOUNT_RO_MASK       = const_cpu_to_le16(0xc027),
+        VOLUME_MUST_MOUNT_RO_MASK       = cpu_to_le16(0xc027),
 } __attribute__ ((__packed__));
 typedef le16 VOLUME_FLAGS;
@@ -2109,26 +2098,26 @@ typedef struct {
 * The user quota flags.  Names explain meaning.
 */
 enum {
-        QUOTA_FLAG_DEFAULT_LIMITS       = const_cpu_to_le32(0x00000001),
+        QUOTA_FLAG_DEFAULT_LIMITS       = cpu_to_le32(0x00000001),
-        QUOTA_FLAG_LIMIT_REACHED        = const_cpu_to_le32(0x00000002),
+        QUOTA_FLAG_LIMIT_REACHED        = cpu_to_le32(0x00000002),
-        QUOTA_FLAG_ID_DELETED           = const_cpu_to_le32(0x00000004),
+        QUOTA_FLAG_ID_DELETED           = cpu_to_le32(0x00000004),
-        QUOTA_FLAG_USER_MASK            = const_cpu_to_le32(0x00000007),
+        QUOTA_FLAG_USER_MASK            = cpu_to_le32(0x00000007),
        /* This is a bit mask for the user quota flags. */
        /*
         * These flags are only present in the quota defaults index entry, i.e.
         * in the entry where owner_id = QUOTA_DEFAULTS_ID.
         */
-        QUOTA_FLAG_TRACKING_ENABLED     = const_cpu_to_le32(0x00000010),
+        QUOTA_FLAG_TRACKING_ENABLED     = cpu_to_le32(0x00000010),
-        QUOTA_FLAG_ENFORCEMENT_ENABLED  = const_cpu_to_le32(0x00000020),
+        QUOTA_FLAG_ENFORCEMENT_ENABLED  = cpu_to_le32(0x00000020),
-        QUOTA_FLAG_TRACKING_REQUESTED   = const_cpu_to_le32(0x00000040),
+        QUOTA_FLAG_TRACKING_REQUESTED   = cpu_to_le32(0x00000040),
-        QUOTA_FLAG_LOG_THRESHOLD        = const_cpu_to_le32(0x00000080),
+        QUOTA_FLAG_LOG_THRESHOLD        = cpu_to_le32(0x00000080),
-        QUOTA_FLAG_LOG_LIMIT            = const_cpu_to_le32(0x00000100),
+        QUOTA_FLAG_LOG_LIMIT            = cpu_to_le32(0x00000100),
-        QUOTA_FLAG_OUT_OF_DATE          = const_cpu_to_le32(0x00000200),
+        QUOTA_FLAG_OUT_OF_DATE          = cpu_to_le32(0x00000200),
-        QUOTA_FLAG_CORRUPT              = const_cpu_to_le32(0x00000400),
+        QUOTA_FLAG_CORRUPT              = cpu_to_le32(0x00000400),
-        QUOTA_FLAG_PENDING_DELETES      = const_cpu_to_le32(0x00000800),
+        QUOTA_FLAG_PENDING_DELETES      = cpu_to_le32(0x00000800),
 };
 typedef le32 QUOTA_FLAGS;
@@ -2172,9 +2161,9 @@ typedef struct {
 * Predefined owner_id values (32-bit).
 */
 enum {
-        QUOTA_INVALID_ID        = const_cpu_to_le32(0x00000000),
+        QUOTA_INVALID_ID        = cpu_to_le32(0x00000000),
-        QUOTA_DEFAULTS_ID       = const_cpu_to_le32(0x00000001),
+        QUOTA_DEFAULTS_ID       = cpu_to_le32(0x00000001),
-        QUOTA_FIRST_USER_ID     = const_cpu_to_le32(0x00000100),
+        QUOTA_FIRST_USER_ID     = cpu_to_le32(0x00000100),
 };
 /*
@@ -2189,14 +2178,14 @@ typedef enum {
 * Index entry flags (16-bit).
 */
 enum {
-        INDEX_ENTRY_NODE = const_cpu_to_le16(1), /* This entry contains a
+        INDEX_ENTRY_NODE = cpu_to_le16(1), /* This entry contains a
                        sub-node, i.e. a reference to an index block in form of
                        a virtual cluster number (see below). */
-        INDEX_ENTRY_END  = const_cpu_to_le16(2), /* This signifies the last
+        INDEX_ENTRY_END  = cpu_to_le16(2), /* This signifies the last
                        entry in an index block.  The index entry does not
                        represent a file but it can point to a sub-node. */
-        INDEX_ENTRY_SPACE_FILLER = const_cpu_to_le16(0xffff), /* gcc: Force
+        INDEX_ENTRY_SPACE_FILLER = cpu_to_le16(0xffff), /* gcc: Force
                        enum bit width to 16-bit. */
 } __attribute__ ((__packed__));
@@ -2334,26 +2323,26 @@ typedef struct {
 * These are the predefined reparse point tags:
 */
 enum {
-        IO_REPARSE_TAG_IS_ALIAS         = const_cpu_to_le32(0x20000000),
+        IO_REPARSE_TAG_IS_ALIAS         = cpu_to_le32(0x20000000),
-        IO_REPARSE_TAG_IS_HIGH_LATENCY  = const_cpu_to_le32(0x40000000),
+        IO_REPARSE_TAG_IS_HIGH_LATENCY  = cpu_to_le32(0x40000000),
-        IO_REPARSE_TAG_IS_MICROSOFT     = const_cpu_to_le32(0x80000000),
+        IO_REPARSE_TAG_IS_MICROSOFT     = cpu_to_le32(0x80000000),
-        IO_REPARSE_TAG_RESERVED_ZERO    = const_cpu_to_le32(0x00000000),
+        IO_REPARSE_TAG_RESERVED_ZERO    = cpu_to_le32(0x00000000),
-        IO_REPARSE_TAG_RESERVED_ONE     = const_cpu_to_le32(0x00000001),
+        IO_REPARSE_TAG_RESERVED_ONE     = cpu_to_le32(0x00000001),
-        IO_REPARSE_TAG_RESERVED_RANGE   = const_cpu_to_le32(0x00000001),
+        IO_REPARSE_TAG_RESERVED_RANGE   = cpu_to_le32(0x00000001),
-        IO_REPARSE_TAG_NSS              = const_cpu_to_le32(0x68000005),
+        IO_REPARSE_TAG_NSS              = cpu_to_le32(0x68000005),
-        IO_REPARSE_TAG_NSS_RECOVER      = const_cpu_to_le32(0x68000006),
+        IO_REPARSE_TAG_NSS_RECOVER      = cpu_to_le32(0x68000006),
-        IO_REPARSE_TAG_SIS              = const_cpu_to_le32(0x68000007),
+        IO_REPARSE_TAG_SIS              = cpu_to_le32(0x68000007),
-        IO_REPARSE_TAG_DFS              = const_cpu_to_le32(0x68000008),
+        IO_REPARSE_TAG_DFS              = cpu_to_le32(0x68000008),
-        IO_REPARSE_TAG_MOUNT_POINT      = const_cpu_to_le32(0x88000003),
+        IO_REPARSE_TAG_MOUNT_POINT      = cpu_to_le32(0x88000003),
-        IO_REPARSE_TAG_HSM              = const_cpu_to_le32(0xa8000004),
+        IO_REPARSE_TAG_HSM              = cpu_to_le32(0xa8000004),
-        IO_REPARSE_TAG_SYMBOLIC_LINK    = const_cpu_to_le32(0xe8000000),
+        IO_REPARSE_TAG_SYMBOLIC_LINK    = cpu_to_le32(0xe8000000),
-        IO_REPARSE_TAG_VALID_VALUES     = const_cpu_to_le32(0xe000ffff),
+        IO_REPARSE_TAG_VALID_VALUES     = cpu_to_le32(0xe000ffff),
 };
 /*
diff --git a/fs/ntfs/logfile.h b/fs/ntfs/logfile.h
index 9468e1c45ae3..b5a6f08bd35c 100644
--- a/fs/ntfs/logfile.h
+++ b/fs/ntfs/logfile.h
@@ -104,7 +104,7 @@ typedef struct {
 * in this particular client array.  Also inside the client records themselves,
 * this means that there are no client records preceding or following this one.
 */
-#define LOGFILE_NO_CLIENT       const_cpu_to_le16(0xffff)
+#define LOGFILE_NO_CLIENT       cpu_to_le16(0xffff)
 #define LOGFILE_NO_CLIENT_CPU   0xffff
 /*
@@ -112,8 +112,8 @@ typedef struct {
 * information about the log file in which they are present.
 */
 enum {
-        RESTART_VOLUME_IS_CLEAN = const_cpu_to_le16(0x0002),
+        RESTART_VOLUME_IS_CLEAN = cpu_to_le16(0x0002),
-        RESTART_SPACE_FILLER    = const_cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */
+        RESTART_SPACE_FILLER    = cpu_to_le16(0xffff), /* gcc: Force enum bit width to 16. */
 } __attribute__ ((__packed__));
 typedef le16 RESTART_AREA_FLAGS;
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 17d32ca6bc35..23bf68453d7d 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -2839,7 +2839,7 @@ int ntfs_extent_mft_record_free(ntfs_inode *ni, MFT_RECORD *m)
         */
        /* Mark the mft record as not in use. */
-        m->flags &= const_cpu_to_le16(~const_le16_to_cpu(MFT_RECORD_IN_USE));
+        m->flags &= ~MFT_RECORD_IN_USE;
        /* Increment the sequence number, skipping zero, if it is not zero. */
        old_seq_no = m->sequence_number;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 4a46743b5077..f76951dcd4a6 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -618,7 +618,7 @@ static bool is_boot_sector_ntfs(const struct super_block *sb,
         * many BIOSes will refuse to boot from a bootsector if the magic is
         * incorrect, so we emit a warning.
         */
-        if (!silent && b->end_of_sector_marker != const_cpu_to_le16(0xaa55))
+        if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55))
                ntfs_warning(sb, "Invalid end of sector marker.");
        return true;
 not_ntfs:
@@ -1242,13 +1242,13 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
        u32 *kaddr, *kend;
        ntfs_name *name = NULL;
        int ret = 1;
-        static const ntfschar hiberfil[13] = { const_cpu_to_le16('h'),
+        static const ntfschar hiberfil[13] = { cpu_to_le16('h'),
-                        const_cpu_to_le16('i'), const_cpu_to_le16('b'),
+                        cpu_to_le16('i'), cpu_to_le16('b'),
-                        const_cpu_to_le16('e'), const_cpu_to_le16('r'),
+                        cpu_to_le16('e'), cpu_to_le16('r'),
-                        const_cpu_to_le16('f'), const_cpu_to_le16('i'),
+                        cpu_to_le16('f'), cpu_to_le16('i'),
-                        const_cpu_to_le16('l'), const_cpu_to_le16('.'),
+                        cpu_to_le16('l'), cpu_to_le16('.'),
-                        const_cpu_to_le16('s'), const_cpu_to_le16('y'),
+                        cpu_to_le16('s'), cpu_to_le16('y'),
-                        const_cpu_to_le16('s'), 0 };
+                        cpu_to_le16('s'), 0 };
        ntfs_debug("Entering.");
        /*
@@ -1296,7 +1296,7 @@ static int check_windows_hibernation_status(ntfs_volume *vol)
                goto iput_out;
        }
        kaddr = (u32*)page_address(page);
-        if (*(le32*)kaddr == const_cpu_to_le32(0x72626968)/*'hibr'*/) {
+        if (*(le32*)kaddr == cpu_to_le32(0x72626968)/*'hibr'*/) {
                ntfs_debug("Magic \"hibr\" found in hiberfil.sys.  Windows is "
                                "hibernated on the volume.  This is the "
                                "system volume.");
@@ -1337,12 +1337,12 @@ static bool load_and_init_quota(ntfs_volume *vol)
        MFT_REF mref;
        struct inode *tmp_ino;
        ntfs_name *name = NULL;
-        static const ntfschar Quota[7] = { const_cpu_to_le16('$'),
+        static const ntfschar Quota[7] = { cpu_to_le16('$'),
-                        const_cpu_to_le16('Q'), const_cpu_to_le16('u'),
+                        cpu_to_le16('Q'), cpu_to_le16('u'),
-                        const_cpu_to_le16('o'), const_cpu_to_le16('t'),
+                        cpu_to_le16('o'), cpu_to_le16('t'),
-                        const_cpu_to_le16('a'), 0 };
+                        cpu_to_le16('a'), 0 };
-        static ntfschar Q[3] = { const_cpu_to_le16('$'),
+        static ntfschar Q[3] = { cpu_to_le16('$'),
-                        const_cpu_to_le16('Q'), 0 };
+                        cpu_to_le16('Q'), 0 };
        ntfs_debug("Entering.");
        /*
@@ -1416,16 +1416,16 @@ static bool load_and_init_usnjrnl(ntfs_volume *vol)
        struct page *page;
        ntfs_name *name = NULL;
        USN_HEADER *uh;
-        static const ntfschar UsnJrnl[9] = { const_cpu_to_le16('$'),
+        static const ntfschar UsnJrnl[9] = { cpu_to_le16('$'),
-                        const_cpu_to_le16('U'), const_cpu_to_le16('s'),
+                        cpu_to_le16('U'), cpu_to_le16('s'),
-                        const_cpu_to_le16('n'), const_cpu_to_le16('J'),
+                        cpu_to_le16('n'), cpu_to_le16('J'),
-                        const_cpu_to_le16('r'), const_cpu_to_le16('n'),
+                        cpu_to_le16('r'), cpu_to_le16('n'),
-                        const_cpu_to_le16('l'), 0 };
+                        cpu_to_le16('l'), 0 };
-        static ntfschar Max[5] = { const_cpu_to_le16('$'),
+        static ntfschar Max[5] = { cpu_to_le16('$'),
-                        const_cpu_to_le16('M'), const_cpu_to_le16('a'),
+                        cpu_to_le16('M'), cpu_to_le16('a'),
-                        const_cpu_to_le16('x'), 0 };
+                        cpu_to_le16('x'), 0 };
-        static ntfschar J[3] = { const_cpu_to_le16('$'),
+        static ntfschar J[3] = { cpu_to_le16('$'),
-                        const_cpu_to_le16('J'), 0 };
+                        cpu_to_le16('J'), 0 };
        ntfs_debug("Entering.");
        /*
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h
index 4087fbdac327..00d8e6bd7c36 100644
--- a/fs/ntfs/usnjrnl.h
+++ b/fs/ntfs/usnjrnl.h
@@ -116,27 +116,27 @@ typedef struct {
 * documentation: http://www.linux-ntfs.org/
 */
 enum {
-        USN_REASON_DATA_OVERWRITE       = const_cpu_to_le32(0x00000001),
+        USN_REASON_DATA_OVERWRITE       = cpu_to_le32(0x00000001),
-        USN_REASON_DATA_EXTEND          = const_cpu_to_le32(0x00000002),
+        USN_REASON_DATA_EXTEND          = cpu_to_le32(0x00000002),
-        USN_REASON_DATA_TRUNCATION      = const_cpu_to_le32(0x00000004),
+        USN_REASON_DATA_TRUNCATION      = cpu_to_le32(0x00000004),
-        USN_REASON_NAMED_DATA_OVERWRITE = const_cpu_to_le32(0x00000010),
+        USN_REASON_NAMED_DATA_OVERWRITE = cpu_to_le32(0x00000010),
-        USN_REASON_NAMED_DATA_EXTEND    = const_cpu_to_le32(0x00000020),
+        USN_REASON_NAMED_DATA_EXTEND    = cpu_to_le32(0x00000020),
-        USN_REASON_NAMED_DATA_TRUNCATION= const_cpu_to_le32(0x00000040),
+        USN_REASON_NAMED_DATA_TRUNCATION= cpu_to_le32(0x00000040),
-        USN_REASON_FILE_CREATE          = const_cpu_to_le32(0x00000100),
+        USN_REASON_FILE_CREATE          = cpu_to_le32(0x00000100),
-        USN_REASON_FILE_DELETE          = const_cpu_to_le32(0x00000200),
+        USN_REASON_FILE_DELETE          = cpu_to_le32(0x00000200),
-        USN_REASON_EA_CHANGE            = const_cpu_to_le32(0x00000400),
+        USN_REASON_EA_CHANGE            = cpu_to_le32(0x00000400),
-        USN_REASON_SECURITY_CHANGE      = const_cpu_to_le32(0x00000800),
+        USN_REASON_SECURITY_CHANGE      = cpu_to_le32(0x00000800),
-        USN_REASON_RENAME_OLD_NAME      = const_cpu_to_le32(0x00001000),
+        USN_REASON_RENAME_OLD_NAME      = cpu_to_le32(0x00001000),
-        USN_REASON_RENAME_NEW_NAME      = const_cpu_to_le32(0x00002000),
+        USN_REASON_RENAME_NEW_NAME      = cpu_to_le32(0x00002000),
-        USN_REASON_INDEXABLE_CHANGE     = const_cpu_to_le32(0x00004000),
+        USN_REASON_INDEXABLE_CHANGE     = cpu_to_le32(0x00004000),
-        USN_REASON_BASIC_INFO_CHANGE    = const_cpu_to_le32(0x00008000),
+        USN_REASON_BASIC_INFO_CHANGE    = cpu_to_le32(0x00008000),
-        USN_REASON_HARD_LINK_CHANGE     = const_cpu_to_le32(0x00010000),
+        USN_REASON_HARD_LINK_CHANGE     = cpu_to_le32(0x00010000),
-        USN_REASON_COMPRESSION_CHANGE   = const_cpu_to_le32(0x00020000),
+        USN_REASON_COMPRESSION_CHANGE   = cpu_to_le32(0x00020000),
-        USN_REASON_ENCRYPTION_CHANGE    = const_cpu_to_le32(0x00040000),
+        USN_REASON_ENCRYPTION_CHANGE    = cpu_to_le32(0x00040000),
-        USN_REASON_OBJECT_ID_CHANGE     = const_cpu_to_le32(0x00080000),
+        USN_REASON_OBJECT_ID_CHANGE     = cpu_to_le32(0x00080000),
-        USN_REASON_REPARSE_POINT_CHANGE = const_cpu_to_le32(0x00100000),
+        USN_REASON_REPARSE_POINT_CHANGE = cpu_to_le32(0x00100000),
-        USN_REASON_STREAM_CHANGE        = const_cpu_to_le32(0x00200000),
+        USN_REASON_STREAM_CHANGE        = cpu_to_le32(0x00200000),
-        USN_REASON_CLOSE                = const_cpu_to_le32(0x80000000),
+        USN_REASON_CLOSE                = cpu_to_le32(0x80000000),
 };
 typedef le32 USN_REASON_FLAGS;
@@ -148,9 +148,9 @@ typedef le32 USN_REASON_FLAGS;
 *      http://www.linux-ntfs.org/
 */
 enum {
-        USN_SOURCE_DATA_MANAGEMENT        = const_cpu_to_le32(0x00000001),
+        USN_SOURCE_DATA_MANAGEMENT        = cpu_to_le32(0x00000001),
-        USN_SOURCE_AUXILIARY_DATA         = const_cpu_to_le32(0x00000002),
+        USN_SOURCE_AUXILIARY_DATA         = cpu_to_le32(0x00000002),
-        USN_SOURCE_REPLICATION_MANAGEMENT = const_cpu_to_le32(0x00000004),
+        USN_SOURCE_REPLICATION_MANAGEMENT = cpu_to_le32(0x00000004),
 };
 typedef le32 USN_SOURCE_INFO_FLAGS;
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index eea1d24713ea..b606496b72ec 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -154,8 +154,9 @@ out:
        return ret;
 }
-static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
        struct buffer_head *di_bh = NULL;
        sigset_t blocked, oldset;
@@ -196,7 +197,8 @@ out:
        ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
        if (ret2 < 0)
                mlog_errno(ret2);
+        if (ret)
+                ret = VM_FAULT_SIGBUS;
        return ret;
 }
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 5d2989e9dcc1..fa678abc9db1 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -37,7 +37,7 @@ static int proc_match(int len, const char *name, struct proc_dir_entry *de)
 #define PROC_BLOCK_SIZE (PAGE_SIZE - 1024)
 static ssize_t
-proc_file_read(struct file *file, char __user *buf, size_t nbytes,
+__proc_file_read(struct file *file, char __user *buf, size_t nbytes,
               loff_t *ppos)
 {
        struct inode * inode = file->f_path.dentry->d_inode;
@@ -183,19 +183,47 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
 }
 static ssize_t
+proc_file_read(struct file *file, char __user *buf, size_t nbytes,
+               loff_t *ppos)
+{
+        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
+        ssize_t rv = -EIO;
+        spin_lock(&pde->pde_unload_lock);
+        if (!pde->proc_fops) {
+                spin_unlock(&pde->pde_unload_lock);
+                return rv;
+        }
+        pde->pde_users++;
+        spin_unlock(&pde->pde_unload_lock);
+        rv = __proc_file_read(file, buf, nbytes, ppos);
+        pde_users_dec(pde);
+        return rv;
+}
+static ssize_t
 proc_file_write(struct file *file, const char __user *buffer,
                size_t count, loff_t *ppos)
 {
-        struct inode *inode = file->f_path.dentry->d_inode;
+        struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
-        struct proc_dir_entry * dp;
+        ssize_t rv = -EIO;
-        
-        dp = PDE(inode);
+        if (pde->write_proc) {
+                spin_lock(&pde->pde_unload_lock);
-        if (!dp->write_proc)
+                if (!pde->proc_fops) {
-                return -EIO;
+                        spin_unlock(&pde->pde_unload_lock);
+                        return rv;
+                }
+                pde->pde_users++;
+                spin_unlock(&pde->pde_unload_lock);
-        /* FIXME: does this routine need ppos?  probably... */
+                /* FIXME: does this routine need ppos?  probably... */
-        return dp->write_proc(file, buffer, count, dp->data);
+                rv = pde->write_proc(file, buffer, count, pde->data);
+                pde_users_dec(pde);
+        }
+        return rv;
 }
@@ -307,6 +335,21 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
 /*
 * Return an inode number between PROC_DYNAMIC_FIRST and
 * 0xffffffff, or zero on failure.
+ *
+ * Current inode allocations in the proc-fs (hex-numbers):
+ *
+ * 00000000             reserved
+ * 00000001-00000fff    static entries  (goners)
+ *      001             root-ino
+ *
+ * 00001000-00001fff    unused
+ * 0001xxxx-7fffxxxx    pid-dir entries for pid 1-7fff
+ * 80000000-efffffff    unused
+ * f0000000-ffffffff    dynamic entries
+ *
+ * Goal:
+ *      Once we split the thing into several virtual filesystems,
+ *      we will get rid of magical ranges (and this comment, BTW).
 */
 static unsigned int get_inode_number(void)
 {
diff --git a/fs/proc/inode-alloc.txt b/fs/proc/inode-alloc.txt
deleted file mode 100644
index 77212f938c2c..000000000000
--- a/fs/proc/inode-alloc.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-Current inode allocations in the proc-fs (hex-numbers):
-  00000000              reserved
-  00000001-00000fff     static entries  (goners)
-       001              root-ino
-  00001000-00001fff     unused
-  0001xxxx-7fffxxxx     pid-dir entries for pid 1-7fff
-  80000000-efffffff     unused
-  f0000000-ffffffff     dynamic entries
-Goal:
-        a) once we'll split the thing into several virtual filesystems we
-        will get rid of magical ranges (and this file, BTW).
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index d8bb5c671f42..d78ade305541 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -58,11 +58,8 @@ static void proc_delete_inode(struct inode *inode)
        /* Let go of any associated proc directory entry */
        de = PROC_I(inode)->pde;
-        if (de) {
+        if (de)
-                if (de->owner)
-                        module_put(de->owner);
                de_put(de);
-        }
        if (PROC_I(inode)->sysctl)
                sysctl_head_put(PROC_I(inode)->sysctl);
        clear_inode(inode);
@@ -127,7 +124,7 @@ static void __pde_users_dec(struct proc_dir_entry *pde)
                complete(pde->pde_unload_completion);
 }
-static void pde_users_dec(struct proc_dir_entry *pde)
+void pde_users_dec(struct proc_dir_entry *pde)
 {
        spin_lock(&pde->pde_unload_lock);
        __pde_users_dec(pde);
@@ -449,12 +446,9 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
 {
        struct inode * inode;
-        if (!try_module_get(de->owner))
-                goto out_mod;
        inode = iget_locked(sb, ino);
        if (!inode)
-                goto out_ino;
+                return NULL;
        if (inode->i_state & I_NEW) {
                inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
                PROC_I(inode)->fd = 0;
@@ -485,16 +479,9 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
                        }
                }
                unlock_new_inode(inode);
-        } else {
+        } else
-               module_put(de->owner);
               de_put(de);
-        }
        return inode;
-out_ino:
-        module_put(de->owner);
-out_mod:
-        return NULL;
 }                       
 int proc_fill_super(struct super_block *s)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index cd53ff838498..f6db9618a888 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -91,3 +91,4 @@ struct pde_opener {
        int (*release)(struct inode *, struct file *);
        struct list_head lh;
 };
+void pde_users_dec(struct proc_dir_entry *pde);
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index d153946d6d15..83adcc869437 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -144,17 +144,12 @@ void proc_tty_register_driver(struct tty_driver *driver)
 {
        struct proc_dir_entry *ent;
                
-        if (!driver->ops->read_proc || !driver->driver_name ||
+        if (!driver->driver_name || driver->proc_entry ||
-            driver->proc_entry)
+            !driver->ops->proc_fops)
                return;
-        ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver);
+        ent = proc_create_data(driver->driver_name, 0, proc_tty_driver,
-        if (!ent)
+                               driver->ops->proc_fops, driver);
-                return;
-        ent->read_proc = driver->ops->read_proc;
-        ent->owner = driver->owner;
-        ent->data = driver;
        driver->proc_entry = ent;
 }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 94063840832a..b0ae0be4801f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -693,8 +693,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
                goto out_pages;
        }
-        pm.out = (u64 *)buf;
+        pm.out = (u64 __user *)buf;
-        pm.end = (u64 *)(buf + count);
+        pm.end = (u64 __user *)(buf + count);
        pagemap_walk.pmd_entry = pagemap_pte_range;
        pagemap_walk.pte_hole = pagemap_pte_hole;
@@ -720,9 +720,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
        if (ret == PM_END_OF_BUFFER)
                ret = 0;
        /* don't need mmap_sem for these, but this looks cleaner */
-        *ppos += (char *)pm.out - buf;
+        *ppos += (char __user *)pm.out - buf;
        if (!ret)
-                ret = (char *)pm.out - buf;
+                ret = (char __user *)pm.out - buf;
 out_pages:
        for (; pagecount; pagecount--) {
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index df26aa88fa47..0c10a0b3f146 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -1,45 +1,43 @@
+#include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
 #include <linux/sched.h>
+#include <linux/seq_file.h>
 #include <linux/time.h>
 #include <asm/cputime.h>
-static int proc_calc_metrics(char *page, char **start, off_t off,
+static int uptime_proc_show(struct seq_file *m, void *v)
-                                 int count, int *eof, int len)
-{
-        if (len <= off + count)
-                *eof = 1;
-        *start = page + off;
-        len -= off;
-        if (len > count)
-                len = count;
-        if (len < 0)
-                len = 0;
-        return len;
-}
-static int uptime_read_proc(char *page, char **start, off_t off, int count,
-                            int *eof, void *data)
 {
        struct timespec uptime;
        struct timespec idle;
-        int len;
        cputime_t idletime = cputime_add(init_task.utime, init_task.stime);
        do_posix_clock_monotonic_gettime(&uptime);
        monotonic_to_bootbased(&uptime);
        cputime_to_timespec(idletime, &idle);
-        len = sprintf(page, "%lu.%02lu %lu.%02lu\n",
+        seq_printf(m, "%lu.%02lu %lu.%02lu\n",
                        (unsigned long) uptime.tv_sec,
                        (uptime.tv_nsec / (NSEC_PER_SEC / 100)),
                        (unsigned long) idle.tv_sec,
                        (idle.tv_nsec / (NSEC_PER_SEC / 100)));
-        return proc_calc_metrics(page, start, off, count, eof, len);
+        return 0;
 }
+static int uptime_proc_open(struct inode *inode, struct file *file)
+{
+        return single_open(file, uptime_proc_show, NULL);
+}
+static const struct file_operations uptime_proc_fops = {
+        .open           = uptime_proc_open,
+        .read           = seq_read,
+        .llseek         = seq_lseek,
+        .release        = single_release,
+};
 static int __init proc_uptime_init(void)
 {
-        create_proc_read_entry("uptime", 0, NULL, uptime_read_proc, NULL);
+        proc_create("uptime", 0, NULL, &uptime_proc_fops);
        return 0;
 }
 module_init(proc_uptime_init);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 995ef1d6686c..ebb2c417912c 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -59,7 +59,6 @@ const struct inode_operations ramfs_file_inode_operations = {
 */
 int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 {
-        struct pagevec lru_pvec;
        unsigned long npages, xpages, loop, limit;
        struct page *pages;
        unsigned order;
@@ -102,24 +101,20 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
        memset(data, 0, newsize);
        /* attach all the pages to the inode's address space */
-        pagevec_init(&lru_pvec, 0);
        for (loop = 0; loop < npages; loop++) {
                struct page *page = pages + loop;
-                ret = add_to_page_cache(page, inode->i_mapping, loop, GFP_KERNEL);
+                ret = add_to_page_cache_lru(page, inode->i_mapping, loop,
+                                        GFP_KERNEL);
                if (ret < 0)
                        goto add_error;
-                if (!pagevec_add(&lru_pvec, page))
-                        __pagevec_lru_add_file(&lru_pvec);
                /* prevent the page from being discarded on memory pressure */
                SetPageDirty(page);
                unlock_page(page);
        }
-        pagevec_lru_add_file(&lru_pvec);
        return 0;
 fsize_exceeded:
@@ -128,10 +123,8 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
        return -EFBIG;
 add_error:
-        pagevec_lru_add_file(&lru_pvec);
+        while (loop < npages)
-        page_cache_release(pages + loop);
+                __free_page(pages + loop++);
-        for (loop++; loop < npages; loop++)
-                __free_page(pages + loop);
        return ret;
 }
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index b7e6ac706b87..a404fb88e456 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -33,12 +33,15 @@
 #include <linux/backing-dev.h>
 #include <linux/ramfs.h>
 #include <linux/sched.h>
+#include <linux/parser.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 /* some random number */
 #define RAMFS_MAGIC     0x858458f6
+#define RAMFS_DEFAULT_MODE      0755
 static const struct super_operations ramfs_ops;
 static const struct inode_operations ramfs_dir_inode_operations;
@@ -158,12 +161,75 @@ static const struct inode_operations ramfs_dir_inode_operations = {
 static const struct super_operations ramfs_ops = {
        .statfs         = simple_statfs,
        .drop_inode     = generic_delete_inode,
+        .show_options   = generic_show_options,
+};
+struct ramfs_mount_opts {
+        umode_t mode;
+};
+enum {
+        Opt_mode,
+        Opt_err
+};
+static const match_table_t tokens = {
+        {Opt_mode, "mode=%o"},
+        {Opt_err, NULL}
+};
+struct ramfs_fs_info {
+        struct ramfs_mount_opts mount_opts;
 };
+static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
+{
+        substring_t args[MAX_OPT_ARGS];
+        int option;
+        int token;
+        char *p;
+        opts->mode = RAMFS_DEFAULT_MODE;
+        while ((p = strsep(&data, ",")) != NULL) {
+                if (!*p)
+                        continue;
+                token = match_token(p, tokens, args);
+                switch (token) {
+                case Opt_mode:
+                        if (match_octal(&args[0], &option))
+                                return -EINVAL;
+                        opts->mode = option & S_IALLUGO;
+                        break;
+                default:
+                        printk(KERN_ERR "ramfs: bad mount option: %s\n", p);
+                        return -EINVAL;
+                }
+        }
+        return 0;
+}
 static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
 {
-        struct inode * inode;
+        struct ramfs_fs_info *fsi;
-        struct dentry * root;
+        struct inode *inode = NULL;
+        struct dentry *root;
+        int err;
+        save_mount_options(sb, data);
+        fsi = kzalloc(sizeof(struct ramfs_fs_info), GFP_KERNEL);
+        if (!fsi) {
+                err = -ENOMEM;
+                goto fail;
+        }
+        sb->s_fs_info = fsi;
+        err = ramfs_parse_options(data, &fsi->mount_opts);
+        if (err)
+                goto fail;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        sb->s_blocksize = PAGE_CACHE_SIZE;
@@ -171,17 +237,23 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
        sb->s_magic = RAMFS_MAGIC;
        sb->s_op = &ramfs_ops;
        sb->s_time_gran = 1;
-        inode = ramfs_get_inode(sb, S_IFDIR | 0755, 0);
+        inode = ramfs_get_inode(sb, S_IFDIR | fsi->mount_opts.mode, 0);
-        if (!inode)
+        if (!inode) {
-                return -ENOMEM;
+                err = -ENOMEM;
+                goto fail;
+        }
        root = d_alloc_root(inode);
        if (!root) {
-                iput(inode);
+                err = -ENOMEM;
-                return -ENOMEM;
+                goto fail;
        }
        sb->s_root = root;
        return 0;
+fail:
+        kfree(fsi);
+        iput(inode);
+        return err;
 }
 int ramfs_get_sb(struct file_system_type *fs_type,
@@ -197,10 +269,16 @@ static int rootfs_get_sb(struct file_system_type *fs_type,
                            mnt);
 }
+static void ramfs_kill_sb(struct super_block *sb)
+{
+        kfree(sb->s_fs_info);
+        kill_litter_super(sb);
+}
 static struct file_system_type ramfs_fs_type = {
        .name           = "ramfs",
        .get_sb         = ramfs_get_sb,
-        .kill_sb        = kill_litter_super,
+        .kill_sb        = ramfs_kill_sb,
 };
 static struct file_system_type rootfs_fs_type = {
        .name           = "rootfs",
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index d5066400638a..9229e5514a4e 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -492,7 +492,6 @@ int reiserfs_proc_info_init(struct super_block *sb)
        spin_lock_init(&__PINFO(sb).lock);
        REISERFS_SB(sb)->procdir = proc_mkdir(b, proc_info_root);
        if (REISERFS_SB(sb)->procdir) {
-                REISERFS_SB(sb)->procdir->owner = THIS_MODULE;
                REISERFS_SB(sb)->procdir->data = sb;
                add_file(sb, "version", show_version);
                add_file(sb, "super", show_super);
@@ -556,9 +555,7 @@ int reiserfs_proc_info_global_init(void)
 {
        if (proc_info_root == NULL) {
                proc_info_root = proc_mkdir(proc_info_root_name, NULL);
-                if (proc_info_root) {
+                if (!proc_info_root) {
-                        proc_info_root->owner = THIS_MODULE;
-                } else {
                        reiserfs_warning(NULL, "cannot create /proc/%s",
                                         proc_info_root_name);
                        return 1;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index e52743e77000..f83f52bae390 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -57,12 +57,14 @@
 /* Helpers for inode ops. We do this so that we don't have all the VFS
 * overhead and also for proper i_mutex annotation.
 * dir->i_mutex must be held for all of them. */
+#ifdef CONFIG_REISERFS_FS_XATTR
 static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 {
        BUG_ON(!mutex_is_locked(&dir->i_mutex));
        vfs_dq_init(dir);
        return dir->i_op->create(dir, dentry, mode, NULL);
 }
+#endif
 static int xattr_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
diff --git a/fs/seq_file.c b/fs/seq_file.c
index a1a4cfe19210..7f40f30c55c5 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -513,7 +513,7 @@ int seq_bitmap(struct seq_file *m, const unsigned long *bits,
 }
 EXPORT_SYMBOL(seq_bitmap);
-int seq_bitmap_list(struct seq_file *m, unsigned long *bits,
+int seq_bitmap_list(struct seq_file *m, const unsigned long *bits,
                unsigned int nr_bits)
 {
        if (m->count < m->size) {
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 07703d3ff4a1..93e0c0281d45 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -234,7 +234,7 @@ static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        return ret;
 }
-static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
        struct file *file = vma->vm_file;
        struct bin_buffer *bb = file->private_data;
@@ -242,15 +242,15 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct page *page)
        int ret;
        if (!bb->vm_ops)
-                return -EINVAL;
+                return VM_FAULT_SIGBUS;
        if (!bb->vm_ops->page_mkwrite)
                return 0;
        if (!sysfs_get_active_two(attr_sd))
-                return -EINVAL;
+                return VM_FAULT_SIGBUS;
-        ret = bb->vm_ops->page_mkwrite(vma, page);
+        ret = bb->vm_ops->page_mkwrite(vma, vmf);
        sysfs_put_active_two(attr_sd);
        return ret;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 93b6de51f261..0ff89fe71e51 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1434,8 +1434,9 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
 * mmap()d file has taken write protection fault and is being made
 * writable. UBIFS must ensure page is budgeted for.
 */
-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+        struct page *page = vmf->page;
        struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
        struct ubifs_info *c = inode->i_sb->s_fs_info;
        struct timespec now = ubifs_current_time(inode);
@@ -1447,7 +1448,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
        ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
        if (unlikely(c->ro_media))
-                return -EROFS;
+                return VM_FAULT_SIGBUS; /* -EROFS */
        /*
         * We have not locked @page so far so we may budget for changing the
@@ -1480,7 +1481,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
                if (err == -ENOSPC)
                        ubifs_warn("out of space for mmapped file "
                                   "(inode number %lu)", inode->i_ino);
-                return err;
+                return VM_FAULT_SIGBUS;
        }
        lock_page(page);
@@ -1520,6 +1521,8 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 out_unlock:
        unlock_page(page);
        ubifs_release_budget(c, &req);
+        if (err)
+                err = VM_FAULT_SIGBUS;
        return err;
 }
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index e14c4e3aea0c..f4e255441574 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -234,9 +234,9 @@ xfs_file_mmap(
 STATIC int
 xfs_vm_page_mkwrite(
        struct vm_area_struct   *vma,
-        struct page             *page)
+        struct vm_fault         *vmf)
 {
-        return block_page_mkwrite(vma, page, xfs_get_blocks);
+        return block_page_mkwrite(vma, vmf, xfs_get_blocks);
 }
 const struct file_operations xfs_file_operations = {