Automerge with /usr/src/ntfs-2.6.git.

author: Anton Altaparmakov <aia21@cantab.net> 2005-06-25 09:27:27 -0400
committer: Anton Altaparmakov <aia21@cantab.net> 2005-06-25 09:27:27 -0400
commit: 38b22b6e9f46ab8f73ef5734f0e0a000766a9258 (patch)
tree: 2ccc41ef55918d3af43e444bde7648562a031559 /fs
parent: 3357d4c75f1fb67e7304998c4ad4e9a9fed66fa4 (diff)
parent: b3e112bcc19abd8e9657dca34a87316786e096f3 (diff)
68 files changed, 1833 insertions, 1019 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index a7c0cc3203cb..8157f2e2d515 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -50,6 +50,23 @@ config EXT2_FS_SECURITY
          If you are not using a security module that requires using
          extended attributes for file security labels, say N.
+config EXT2_FS_XIP
+        bool "Ext2 execute in place support"
+        depends on EXT2_FS
+        help
+          Execute in place can be used on memory-backed block devices. If you
+          enable this option, you can select to mount block devices which are
+          capable of this feature without using the page cache.
+          If you do not use a block device that is capable of using this,
+          or if unsure, say N.
+config FS_XIP
+# execute in place
+        bool
+        depends on EXT2_FS_XIP
+        default y
 config EXT3_FS
        tristate "Ext3 journalling file system support"
        help
@@ -1413,6 +1430,8 @@ config NFSD_V4
        bool "Provide NFSv4 server support (EXPERIMENTAL)"
        depends on NFSD_V3 && EXPERIMENTAL
        select NFSD_TCP
+        select CRYPTO_MD5
+        select CRYPTO
        help
          If you would like to include the NFSv4 server as well as the NFSv2
          and NFSv3 servers, say Y here.  This feature is experimental, and
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c0cbd1bc1a02..e0df94c37b7e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -777,8 +777,7 @@ static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
        return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
 }
-static int block_ioctl(struct inode *inode, struct file *file, unsigned cmd,
+static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-                        unsigned long arg)
 {
        return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
 }
@@ -803,7 +802,7 @@ struct file_operations def_blk_fops = {
        .aio_write      = blkdev_file_aio_write, 
        .mmap           = generic_file_mmap,
        .fsync          = block_fsync,
-        .ioctl          = block_ioctl,
+        .unlocked_ioctl = block_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = compat_blkdev_ioctl,
 #endif
diff --git a/fs/buffer.c b/fs/buffer.c
index 0befa724ab98..13e5938a64f6 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -331,7 +331,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
        return ret;
 }
-asmlinkage long sys_fsync(unsigned int fd)
+static long do_fsync(unsigned int fd, int datasync)
 {
        struct file * file;
        struct address_space *mapping;
@@ -342,14 +342,14 @@ asmlinkage long sys_fsync(unsigned int fd)
        if (!file)
                goto out;
-        mapping = file->f_mapping;
        ret = -EINVAL;
        if (!file->f_op || !file->f_op->fsync) {
                /* Why?  We can still call filemap_fdatawrite */
                goto out_putf;
        }
+        mapping = file->f_mapping;
        current->flags |= PF_SYNCWRITE;
        ret = filemap_fdatawrite(mapping);
@@ -358,7 +358,7 @@ asmlinkage long sys_fsync(unsigned int fd)
         * which could cause livelocks in fsync_buffers_list
         */
        down(&mapping->host->i_sem);
-        err = file->f_op->fsync(file, file->f_dentry, 0);
+        err = file->f_op->fsync(file, file->f_dentry, datasync);
        if (!ret)
                ret = err;
        up(&mapping->host->i_sem);
@@ -373,39 +373,14 @@ out:
        return ret;
 }
-asmlinkage long sys_fdatasync(unsigned int fd)
+asmlinkage long sys_fsync(unsigned int fd)
 {
-        struct file * file;
+        return do_fsync(fd, 0);
-        struct address_space *mapping;
+}
-        int ret, err;
-        ret = -EBADF;
-        file = fget(fd);
-        if (!file)
-                goto out;
-        ret = -EINVAL;
-        if (!file->f_op || !file->f_op->fsync)
-                goto out_putf;
-        mapping = file->f_mapping;
-        current->flags |= PF_SYNCWRITE;
-        ret = filemap_fdatawrite(mapping);
-        down(&mapping->host->i_sem);
-        err = file->f_op->fsync(file, file->f_dentry, 1);
-        if (!ret)
-                ret = err;
-        up(&mapping->host->i_sem);
-        err = filemap_fdatawait(mapping);
-        if (!ret)
-                ret = err;
-        current->flags &= ~PF_SYNCWRITE;
-out_putf:
+asmlinkage long sys_fdatasync(unsigned int fd)
-        fput(file);
+{
-out:
+        return do_fsync(fd, 1);
-        return ret;
 }
 /*
@@ -1951,7 +1926,6 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
                        if (err)
                                break;
                        if (buffer_new(bh)) {
-                                clear_buffer_new(bh);
                                unmap_underlying_metadata(bh->b_bdev,
                                                        bh->b_blocknr);
                                if (PageUptodate(page)) {
@@ -1993,9 +1967,14 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
                if (!buffer_uptodate(*wait_bh))
                        err = -EIO;
        }
-        if (!err)
+        if (!err) {
-                return err;
+                bh = head;
+                do {
+                        if (buffer_new(bh))
+                                clear_buffer_new(bh);
+                } while ((bh = bh->b_this_page) != head);
+                return 0;
+        }
        /* Error case: */
        /*
         * Zero out any newly allocated blocks to avoid exposing stale
diff --git a/fs/char_dev.c b/fs/char_dev.c
index c1e3537909fc..e82aac9cc2f5 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -56,10 +56,21 @@ int get_chrdev_list(char *page)
        down(&chrdevs_lock);
        for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) {
-                for (cd = chrdevs[i]; cd; cd = cd->next)
+                for (cd = chrdevs[i]; cd; cd = cd->next) {
+                        /*
+                         * if the current name, plus the 5 extra characters
+                         * in the device line for this entry
+                         * would run us off the page, we're done
+                         */
+                        if ((len+strlen(cd->name) + 5) >= PAGE_SIZE)
+                                goto page_full;
                        len += sprintf(page+len, "%3d %s\n",
                                       cd->major, cd->name);
+                }
        }
+page_full:
        up(&chrdevs_lock);
        return len;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b529786699e7..a86ac4aeaedb 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -110,16 +110,6 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
        return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
 }
-static struct dentry * get_dentry(struct dentry *parent, const char *name)
-{               
-        struct qstr qstr;
-        qstr.name = name;
-        qstr.len = strlen(name);
-        qstr.hash = full_name_hash(name,qstr.len);
-        return lookup_hash(&qstr,parent);
-}               
 static struct super_block *debug_get_sb(struct file_system_type *fs_type,
                                        int flags, const char *dev_name,
                                        void *data)
@@ -157,7 +147,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
        *dentry = NULL;
        down(&parent->d_inode->i_sem);
-        *dentry = get_dentry (parent, name);
+        *dentry = lookup_one_len(name, parent, strlen(name));
        if (!IS_ERR(dentry)) {
                if ((mode & S_IFMT) == S_IFDIR)
                        error = debugfs_mkdir(parent->d_inode, *dentry, mode);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1d55e7e67342..0d06097bc995 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -215,7 +215,7 @@ static struct page *dio_get_page(struct dio *dio)
 static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
 {
        if (dio->end_io && dio->result)
-                dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private);
+                dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
        if (dio->lock_type == DIO_LOCKING)
                up_read(&dio->inode->i_alloc_sem);
 }
diff --git a/fs/dquot.c b/fs/dquot.c
index 3995ce7907cc..37212b039a4a 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1519,14 +1519,22 @@ out_path:
 * This function is used when filesystem needs to initialize quotas
 * during mount time.
 */
-int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry)
+int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
+                int format_id, int type)
 {
+        struct dentry *dentry;
        int error;
+        dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
+        if (IS_ERR(dentry))
+                return PTR_ERR(dentry);
        error = security_quota_on(dentry);
-        if (error)
+        if (!error)
-                return error;
+                error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
-        return vfs_quota_on_inode(dentry->d_inode, type, format_id);
+        dput(dentry);
+        return error;
 }
 /* Generic routine for getting common part of quota structure */
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9900e333655a..6ab1dd0ca904 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -101,57 +101,6 @@
 /* Maximum number of poll wake up nests we are allowing */
 #define EP_MAX_POLLWAKE_NESTS 4
-/* Macro to allocate a "struct epitem" from the slab cache */
-#define EPI_MEM_ALLOC() (struct epitem *) kmem_cache_alloc(epi_cache, SLAB_KERNEL)
-/* Macro to free a "struct epitem" to the slab cache */
-#define EPI_MEM_FREE(p) kmem_cache_free(epi_cache, p)
-/* Macro to allocate a "struct eppoll_entry" from the slab cache */
-#define PWQ_MEM_ALLOC() (struct eppoll_entry *) kmem_cache_alloc(pwq_cache, SLAB_KERNEL)
-/* Macro to free a "struct eppoll_entry" to the slab cache */
-#define PWQ_MEM_FREE(p) kmem_cache_free(pwq_cache, p)
-/* Fast test to see if the file is an evenpoll file */
-#define IS_FILE_EPOLL(f) ((f)->f_op == &eventpoll_fops)
-/* Setup the structure that is used as key for the rb-tree */
-#define EP_SET_FFD(p, f, d) do { (p)->file = (f); (p)->fd = (d); } while (0)
-/* Compare rb-tree keys */
-#define EP_CMP_FFD(p1, p2) ((p1)->file > (p2)->file ? +1: \
-                            ((p1)->file < (p2)->file ? -1: (p1)->fd - (p2)->fd))
-/* Special initialization for the rb-tree node to detect linkage */
-#define EP_RB_INITNODE(n) (n)->rb_parent = (n)
-/* Removes a node from the rb-tree and marks it for a fast is-linked check */
-#define EP_RB_ERASE(n, r) do { rb_erase(n, r); (n)->rb_parent = (n); } while (0)
-/* Fast check to verify that the item is linked to the main rb-tree */
-#define EP_RB_LINKED(n) ((n)->rb_parent != (n))
-/*
- * Remove the item from the list and perform its initialization.
- * This is useful for us because we can test if the item is linked
- * using "EP_IS_LINKED(p)".
- */
-#define EP_LIST_DEL(p) do { list_del(p); INIT_LIST_HEAD(p); } while (0)
-/* Tells us if the item is currently linked */
-#define EP_IS_LINKED(p) (!list_empty(p))
-/* Get the "struct epitem" from a wait queue pointer */
-#define EP_ITEM_FROM_WAIT(p) ((struct epitem *) container_of(p, struct eppoll_entry, wait)->base)
-/* Get the "struct epitem" from an epoll queue wrapper */
-#define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi)
-/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
-#define EP_OP_HASH_EVENT(op) ((op) != EPOLL_CTL_DEL)
 struct epoll_filefd {
        struct file *file;
        int fd;
@@ -357,6 +306,82 @@ static struct dentry_operations eventpollfs_dentry_operations = {
+/* Fast test to see if the file is an evenpoll file */
+static inline int is_file_epoll(struct file *f)
+{
+        return f->f_op == &eventpoll_fops;
+}
+/* Setup the structure that is used as key for the rb-tree */
+static inline void ep_set_ffd(struct epoll_filefd *ffd,
+                              struct file *file, int fd)
+{
+        ffd->file = file;
+        ffd->fd = fd;
+}
+/* Compare rb-tree keys */
+static inline int ep_cmp_ffd(struct epoll_filefd *p1,
+                             struct epoll_filefd *p2)
+{
+        return (p1->file > p2->file ? +1:
+                (p1->file < p2->file ? -1 : p1->fd - p2->fd));
+}
+/* Special initialization for the rb-tree node to detect linkage */
+static inline void ep_rb_initnode(struct rb_node *n)
+{
+        n->rb_parent = n;
+}
+/* Removes a node from the rb-tree and marks it for a fast is-linked check */
+static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
+{
+        rb_erase(n, r);
+        n->rb_parent = n;
+}
+/* Fast check to verify that the item is linked to the main rb-tree */
+static inline int ep_rb_linked(struct rb_node *n)
+{
+        return n->rb_parent != n;
+}
+/*
+ * Remove the item from the list and perform its initialization.
+ * This is useful for us because we can test if the item is linked
+ * using "ep_is_linked(p)".
+ */
+static inline void ep_list_del(struct list_head *p)
+{
+        list_del(p);
+        INIT_LIST_HEAD(p);
+}
+/* Tells us if the item is currently linked */
+static inline int ep_is_linked(struct list_head *p)
+{
+        return !list_empty(p);
+}
+/* Get the "struct epitem" from a wait queue pointer */
+static inline struct epitem * ep_item_from_wait(wait_queue_t *p)
+{
+        return container_of(p, struct eppoll_entry, wait)->base;
+}
+/* Get the "struct epitem" from an epoll queue wrapper */
+static inline struct epitem * ep_item_from_epqueue(poll_table *p)
+{
+        return container_of(p, struct ep_pqueue, pt)->epi;
+}
+/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
+static inline int ep_op_hash_event(int op)
+{
+        return op != EPOLL_CTL_DEL;
+}
 /* Initialize the poll safe wake up structure */
 static void ep_poll_safewake_init(struct poll_safewake *psw)
 {
@@ -456,7 +481,7 @@ void eventpoll_release_file(struct file *file)
                epi = list_entry(lsthead->next, struct epitem, fllink);
                ep = epi->ep;
-                EP_LIST_DEL(&epi->fllink);
+                ep_list_del(&epi->fllink);
                down_write(&ep->sem);
                ep_remove(ep, epi);
                up_write(&ep->sem);
@@ -534,7 +559,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
                     current, epfd, op, fd, event));
        error = -EFAULT;
-        if (EP_OP_HASH_EVENT(op) &&
+        if (ep_op_hash_event(op) &&
            copy_from_user(&epds, event, sizeof(struct epoll_event)))
                goto eexit_1;
@@ -560,7 +585,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
         * adding an epoll file descriptor inside itself.
         */
        error = -EINVAL;
-        if (file == tfile || !IS_FILE_EPOLL(file))
+        if (file == tfile || !is_file_epoll(file))
                goto eexit_3;
        /*
@@ -656,7 +681,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
         * the user passed to us _is_ an eventpoll file.
         */
        error = -EINVAL;
-        if (!IS_FILE_EPOLL(file))
+        if (!is_file_epoll(file))
                goto eexit_2;
        /*
@@ -831,11 +856,11 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
        struct epitem *epi, *epir = NULL;
        struct epoll_filefd ffd;
-        EP_SET_FFD(&ffd, file, fd);
+        ep_set_ffd(&ffd, file, fd);
        read_lock_irqsave(&ep->lock, flags);
        for (rbp = ep->rbr.rb_node; rbp; ) {
                epi = rb_entry(rbp, struct epitem, rbn);
-                kcmp = EP_CMP_FFD(&ffd, &epi->ffd);
+                kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
                if (kcmp > 0)
                        rbp = rbp->rb_right;
                else if (kcmp < 0)
@@ -875,7 +900,7 @@ static void ep_release_epitem(struct epitem *epi)
 {
        if (atomic_dec_and_test(&epi->usecnt))
-                EPI_MEM_FREE(epi);
+                kmem_cache_free(epi_cache, epi);
 }
@@ -886,10 +911,10 @@ static void ep_release_epitem(struct epitem *epi)
 static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
                                 poll_table *pt)
 {
-        struct epitem *epi = EP_ITEM_FROM_EPQUEUE(pt);
+        struct epitem *epi = ep_item_from_epqueue(pt);
        struct eppoll_entry *pwq;
-        if (epi->nwait >= 0 && (pwq = PWQ_MEM_ALLOC())) {
+        if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) {
                init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
                pwq->whead = whead;
                pwq->base = epi;
@@ -912,7 +937,7 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
        while (*p) {
                parent = *p;
                epic = rb_entry(parent, struct epitem, rbn);
-                kcmp = EP_CMP_FFD(&epi->ffd, &epic->ffd);
+                kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
                if (kcmp > 0)
                        p = &parent->rb_right;
                else
@@ -932,17 +957,17 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
        struct ep_pqueue epq;
        error = -ENOMEM;
-        if (!(epi = EPI_MEM_ALLOC()))
+        if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL)))
                goto eexit_1;
        /* Item initialization follow here ... */
-        EP_RB_INITNODE(&epi->rbn);
+        ep_rb_initnode(&epi->rbn);
        INIT_LIST_HEAD(&epi->rdllink);
        INIT_LIST_HEAD(&epi->fllink);
        INIT_LIST_HEAD(&epi->txlink);
        INIT_LIST_HEAD(&epi->pwqlist);
        epi->ep = ep;
-        EP_SET_FFD(&epi->ffd, tfile, fd);
+        ep_set_ffd(&epi->ffd, tfile, fd);
        epi->event = *event;
        atomic_set(&epi->usecnt, 1);
        epi->nwait = 0;
@@ -978,7 +1003,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
        ep_rbtree_insert(ep, epi);
        /* If the file is already "ready" we drop it inside the ready list */
-        if ((revents & event->events) && !EP_IS_LINKED(&epi->rdllink)) {
+        if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
                list_add_tail(&epi->rdllink, &ep->rdllist);
                /* Notify waiting tasks that events are available */
@@ -1007,11 +1032,11 @@ eexit_2:
         * allocated wait queue.
         */
        write_lock_irqsave(&ep->lock, flags);
-        if (EP_IS_LINKED(&epi->rdllink))
+        if (ep_is_linked(&epi->rdllink))
-                EP_LIST_DEL(&epi->rdllink);
+                ep_list_del(&epi->rdllink);
        write_unlock_irqrestore(&ep->lock, flags);
-        EPI_MEM_FREE(epi);
+        kmem_cache_free(epi_cache, epi);
 eexit_1:
        return error;
 }
@@ -1050,14 +1075,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
         * If the item is not linked to the hash it means that it's on its
         * way toward the removal. Do nothing in this case.
         */
-        if (EP_RB_LINKED(&epi->rbn)) {
+        if (ep_rb_linked(&epi->rbn)) {
                /*
                 * If the item is "hot" and it is not registered inside the ready
                 * list, push it inside. If the item is not "hot" and it is currently
                 * registered inside the ready list, unlink it.
                 */
                if (revents & event->events) {
-                        if (!EP_IS_LINKED(&epi->rdllink)) {
+                        if (!ep_is_linked(&epi->rdllink)) {
                                list_add_tail(&epi->rdllink, &ep->rdllist);
                                /* Notify waiting tasks that events are available */
@@ -1097,9 +1122,9 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
                while (!list_empty(lsthead)) {
                        pwq = list_entry(lsthead->next, struct eppoll_entry, llink);
-                        EP_LIST_DEL(&pwq->llink);
+                        ep_list_del(&pwq->llink);
                        remove_wait_queue(pwq->whead, &pwq->wait);
-                        PWQ_MEM_FREE(pwq);
+                        kmem_cache_free(pwq_cache, pwq);
                }
        }
 }
@@ -1118,7 +1143,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
         * The check protect us from doing a double unlink ( crash ).
         */
        error = -ENOENT;
-        if (!EP_RB_LINKED(&epi->rbn))
+        if (!ep_rb_linked(&epi->rbn))
                goto eexit_1;
        /*
@@ -1133,14 +1158,14 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
         * This operation togheter with the above check closes the door to
         * double unlinks.
         */
-        EP_RB_ERASE(&epi->rbn, &ep->rbr);
+        ep_rb_erase(&epi->rbn, &ep->rbr);
        /*
         * If the item we are going to remove is inside the ready file descriptors
         * we want to remove it from this list to avoid stale events.
         */
-        if (EP_IS_LINKED(&epi->rdllink))
+        if (ep_is_linked(&epi->rdllink))
-                EP_LIST_DEL(&epi->rdllink);
+                ep_list_del(&epi->rdllink);
        error = 0;
 eexit_1:
@@ -1174,8 +1199,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
        /* Remove the current item from the list of epoll hooks */
        spin_lock(&file->f_ep_lock);
-        if (EP_IS_LINKED(&epi->fllink))
+        if (ep_is_linked(&epi->fllink))
-                EP_LIST_DEL(&epi->fllink);
+                ep_list_del(&epi->fllink);
        spin_unlock(&file->f_ep_lock);
        /* We need to acquire the write IRQ lock before calling ep_unlink() */
@@ -1210,7 +1235,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 {
        int pwake = 0;
        unsigned long flags;
-        struct epitem *epi = EP_ITEM_FROM_WAIT(wait);
+        struct epitem *epi = ep_item_from_wait(wait);
        struct eventpoll *ep = epi->ep;
        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
@@ -1228,7 +1253,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
                goto is_disabled;
        /* If this file is already in the ready list we exit soon */
-        if (EP_IS_LINKED(&epi->rdllink))
+        if (ep_is_linked(&epi->rdllink))
                goto is_linked;
        list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -1307,7 +1332,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
                lnk = lnk->next;
                /* If this file is already in the ready list we exit soon */
-                if (!EP_IS_LINKED(&epi->txlink)) {
+                if (!ep_is_linked(&epi->txlink)) {
                        /*
                         * This is initialized in this way so that the default
                         * behaviour of the reinjecting code will be to push back
@@ -1322,7 +1347,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
                        /*
                         * Unlink the item from the ready list.
                         */
-                        EP_LIST_DEL(&epi->rdllink);
+                        ep_list_del(&epi->rdllink);
                }
        }
@@ -1401,7 +1426,7 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
                epi = list_entry(txlist->next, struct epitem, txlink);
                /* Unlink the current item from the transfer list */
-                EP_LIST_DEL(&epi->txlink);
+                ep_list_del(&epi->txlink);
                /*
                 * If the item is no more linked to the interest set, we don't
@@ -1410,8 +1435,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
                 * item is set to have an Edge Triggered behaviour, we don't have
                 * to push it back either.
                 */
-                if (EP_RB_LINKED(&epi->rbn) && !(epi->event.events & EPOLLET) &&
+                if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) &&
-                    (epi->revents & epi->event.events) && !EP_IS_LINKED(&epi->rdllink)) {
+                    (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) {
                        list_add_tail(&epi->rdllink, &ep->rdllist);
                        ricnt++;
                }
diff --git a/fs/exec.c b/fs/exec.c
index 3a4b35a14c0d..48871917d363 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -58,6 +58,9 @@
 int core_uses_pid;
 char core_pattern[65] = "core";
+int suid_dumpable = 0;
+EXPORT_SYMBOL(suid_dumpable);
 /* The maximal length of core_pattern is also specified in sysctl.c */
 static struct linux_binfmt *formats;
@@ -864,6 +867,9 @@ int flush_old_exec(struct linux_binprm * bprm)
        if (current->euid == current->uid && current->egid == current->gid)
                current->mm->dumpable = 1;
+        else
+                current->mm->dumpable = suid_dumpable;
        name = bprm->filename;
        /* Copies the binary name from after last slash */
@@ -884,7 +890,7 @@ int flush_old_exec(struct linux_binprm * bprm)
            permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) ||
            (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
                suid_keys(current);
-                current->mm->dumpable = 0;
+                current->mm->dumpable = suid_dumpable;
        }
        /* An exec changes our domain. We are no longer part of the thread
@@ -1432,6 +1438,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        struct inode * inode;
        struct file * file;
        int retval = 0;
+        int fsuid = current->fsuid;
+        int flag = 0;
        binfmt = current->binfmt;
        if (!binfmt || !binfmt->core_dump)
@@ -1441,6 +1449,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
                up_write(&mm->mmap_sem);
                goto fail;
        }
+        /*
+         *      We cannot trust fsuid as being the "true" uid of the
+         *      process nor do we know its entire history. We only know it
+         *      was tainted so we dump it as root in mode 2.
+         */
+        if (mm->dumpable == 2) {        /* Setuid core dump mode */
+                flag = O_EXCL;          /* Stop rewrite attacks */
+                current->fsuid = 0;     /* Dump root private */
+        }
        mm->dumpable = 0;
        init_completion(&mm->core_done);
        spin_lock_irq(&current->sighand->siglock);
@@ -1466,7 +1484,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        lock_kernel();
        format_corename(corename, core_pattern, signr);
        unlock_kernel();
-        file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600);
+        file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600);
        if (IS_ERR(file))
                goto fail_unlock;
        inode = file->f_dentry->d_inode;
@@ -1491,6 +1509,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 close_fail:
        filp_close(file, NULL);
 fail_unlock:
+        current->fsuid = fsuid;
        complete_all(&mm->core_done);
 fail:
        return retval;
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index ee240a14e70f..c5d02da73bc3 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -10,3 +10,4 @@ ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
 ext2-$(CONFIG_EXT2_FS_XATTR)     += xattr.o xattr_user.o xattr_trusted.o
 ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
 ext2-$(CONFIG_EXT2_FS_SECURITY)  += xattr_security.o
+ext2-$(CONFIG_EXT2_FS_XIP)       += xip.o
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 25f4a64fd6bc..213148c36ebe 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -396,12 +396,12 @@ static size_t
 ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
                           const char *name, size_t name_len)
 {
-        const size_t size = sizeof(XATTR_NAME_ACL_ACCESS);
+        const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
        if (!test_opt(inode->i_sb, POSIX_ACL))
                return 0;
        if (list && size <= list_size)
-                memcpy(list, XATTR_NAME_ACL_ACCESS, size);
+                memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
        return size;
 }
@@ -409,12 +409,12 @@ static size_t
 ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
                            const char *name, size_t name_len)
 {
-        const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT);
+        const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
        if (!test_opt(inode->i_sb, POSIX_ACL))
                return 0;
        if (list && size <= list_size)
-                memcpy(list, XATTR_NAME_ACL_DEFAULT, size);
+                memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
        return size;
 }
@@ -506,14 +506,14 @@ ext2_xattr_set_acl_default(struct inode *inode, const char *name,
 }
 struct xattr_handler ext2_xattr_acl_access_handler = {
-        .prefix = XATTR_NAME_ACL_ACCESS,
+        .prefix = POSIX_ACL_XATTR_ACCESS,
        .list   = ext2_xattr_list_acl_access,
        .get    = ext2_xattr_get_acl_access,
        .set    = ext2_xattr_set_acl_access,
 };
 struct xattr_handler ext2_xattr_acl_default_handler = {
-        .prefix = XATTR_NAME_ACL_DEFAULT,
+        .prefix = POSIX_ACL_XATTR_DEFAULT,
        .list   = ext2_xattr_list_acl_default,
        .get    = ext2_xattr_get_acl_default,
        .set    = ext2_xattr_set_acl_default,
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index fed96ae81a7d..0bde85bafe38 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -4,7 +4,7 @@
  (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 #define EXT2_ACL_VERSION        0x0001
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 8f0fd726c3f1..eed521d22cf0 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -147,9 +147,11 @@ extern struct file_operations ext2_dir_operations;
 /* file.c */
 extern struct inode_operations ext2_file_inode_operations;
 extern struct file_operations ext2_file_operations;
+extern struct file_operations ext2_xip_file_operations;
 /* inode.c */
 extern struct address_space_operations ext2_aops;
+extern struct address_space_operations ext2_aops_xip;
 extern struct address_space_operations ext2_nobh_aops;
 /* namei.c */
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index f5e86141ec54..a484412fc782 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -55,6 +55,20 @@ struct file_operations ext2_file_operations = {
        .sendfile       = generic_file_sendfile,
 };
+#ifdef CONFIG_EXT2_FS_XIP
+struct file_operations ext2_xip_file_operations = {
+        .llseek         = generic_file_llseek,
+        .read           = xip_file_read,
+        .write          = xip_file_write,
+        .ioctl          = ext2_ioctl,
+        .mmap           = xip_file_mmap,
+        .open           = generic_file_open,
+        .release        = ext2_release_file,
+        .fsync          = ext2_sync_file,
+        .sendfile       = xip_file_sendfile,
+};
+#endif
 struct inode_operations ext2_file_inode_operations = {
        .truncate       = ext2_truncate,
 #ifdef CONFIG_EXT2_FS_XATTR
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a50d9db4b6e4..53dceb0c6593 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -33,6 +33,7 @@
 #include <linux/mpage.h>
 #include "ext2.h"
 #include "acl.h"
+#include "xip.h"
 MODULE_AUTHOR("Remy Card and others");
 MODULE_DESCRIPTION("Second Extended Filesystem");
@@ -594,6 +595,16 @@ out:
        if (err)
                goto cleanup;
+        if (ext2_use_xip(inode->i_sb)) {
+                /*
+                 * we need to clear the block
+                 */
+                err = ext2_clear_xip_target (inode,
+                        le32_to_cpu(chain[depth-1].key));
+                if (err)
+                        goto cleanup;
+        }
        if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0)
                goto changed;
@@ -691,6 +702,11 @@ struct address_space_operations ext2_aops = {
        .writepages             = ext2_writepages,
 };
+struct address_space_operations ext2_aops_xip = {
+        .bmap                   = ext2_bmap,
+        .get_xip_page           = ext2_get_xip_page,
+};
 struct address_space_operations ext2_nobh_aops = {
        .readpage               = ext2_readpage,
        .readpages              = ext2_readpages,
@@ -910,7 +926,9 @@ void ext2_truncate (struct inode * inode)
        iblock = (inode->i_size + blocksize-1)
                                        >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
-        if (test_opt(inode->i_sb, NOBH))
+        if (mapping_is_xip(inode->i_mapping))
+                xip_truncate_page(inode->i_mapping, inode->i_size);
+        else if (test_opt(inode->i_sb, NOBH))
                nobh_truncate_page(inode->i_mapping, inode->i_size);
        else
                block_truncate_page(inode->i_mapping,
@@ -1110,11 +1128,16 @@ void ext2_read_inode (struct inode * inode)
        if (S_ISREG(inode->i_mode)) {
                inode->i_op = &ext2_file_inode_operations;
-                inode->i_fop = &ext2_file_operations;
+                if (ext2_use_xip(inode->i_sb)) {
-                if (test_opt(inode->i_sb, NOBH))
+                        inode->i_mapping->a_ops = &ext2_aops_xip;
+                        inode->i_fop = &ext2_xip_file_operations;
+                } else if (test_opt(inode->i_sb, NOBH)) {
                        inode->i_mapping->a_ops = &ext2_nobh_aops;
-                else
+                        inode->i_fop = &ext2_file_operations;
+                } else {
                        inode->i_mapping->a_ops = &ext2_aops;
+                        inode->i_fop = &ext2_file_operations;
+                }
        } else if (S_ISDIR(inode->i_mode)) {
                inode->i_op = &ext2_dir_inode_operations;
                inode->i_fop = &ext2_dir_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 3176b3d3ffa8..c5513953c825 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -34,6 +34,7 @@
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "xip.h"
 /*
 * Couple of helper functions - make the code slightly cleaner.
@@ -127,11 +128,16 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
        int err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
                inode->i_op = &ext2_file_inode_operations;
-                inode->i_fop = &ext2_file_operations;
+                if (ext2_use_xip(inode->i_sb)) {
-                if (test_opt(inode->i_sb, NOBH))
+                        inode->i_mapping->a_ops = &ext2_aops_xip;
+                        inode->i_fop = &ext2_xip_file_operations;
+                } else if (test_opt(inode->i_sb, NOBH)) {
                        inode->i_mapping->a_ops = &ext2_nobh_aops;
-                else
+                        inode->i_fop = &ext2_file_operations;
+                } else {
                        inode->i_mapping->a_ops = &ext2_aops;
+                        inode->i_fop = &ext2_file_operations;
+                }
                mark_inode_dirty(inode);
                err = ext2_add_nondir(dentry, inode);
        }
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 661c3d98d946..876e391f2871 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
 #include "ext2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "xip.h"
 static void ext2_sync_super(struct super_block *sb,
                            struct ext2_super_block *es);
@@ -257,7 +258,7 @@ enum {
        Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
        Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
        Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh,
-        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip,
        Opt_ignore, Opt_err,
 };
@@ -286,6 +287,7 @@ static match_table_t tokens = {
        {Opt_nouser_xattr, "nouser_xattr"},
        {Opt_acl, "acl"},
        {Opt_noacl, "noacl"},
+        {Opt_xip, "xip"},
        {Opt_ignore, "grpquota"},
        {Opt_ignore, "noquota"},
        {Opt_ignore, "quota"},
@@ -397,6 +399,13 @@ static int parse_options (char * options,
                        printk("EXT2 (no)acl options not supported\n");
                        break;
 #endif
+                case Opt_xip:
+#ifdef CONFIG_EXT2_FS_XIP
+                        set_opt (sbi->s_mount_opt, XIP);
+#else
+                        printk("EXT2 xip option not supported\n");
+#endif
+                        break;
                case Opt_ignore:
                        break;
                default:
@@ -640,6 +649,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
                ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
                 MS_POSIXACL : 0);
+        ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
+                                    EXT2_MOUNT_XIP if not */
        if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
            (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
             EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
@@ -668,6 +680,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
        blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
+        if ((ext2_use_xip(sb)) && ((blocksize != PAGE_SIZE) ||
+                                  (sb->s_blocksize != blocksize))) {
+                if (!silent)
+                        printk("XIP: Unsupported blocksize\n");
+                goto failed_mount;
+        }
        /* If the blocksize doesn't match, re-read the thing.. */
        if (sb->s_blocksize != blocksize) {
                brelse(bh);
@@ -916,6 +935,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 {
        struct ext2_sb_info * sbi = EXT2_SB(sb);
        struct ext2_super_block * es;
+        unsigned long old_mount_opt = sbi->s_mount_opt;
        /*
         * Allow the "check" option to be passed as a remount option.
@@ -927,6 +947,11 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
                ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
        es = sbi->s_es;
+        if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
+            (old_mount_opt & EXT2_MOUNT_XIP)) &&
+            invalidate_inodes(sb))
+                ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
+                             "xip remain in cache (no functional problem)");
        if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
                return 0;
        if (*flags & MS_RDONLY) {
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
new file mode 100644
index 000000000000..d44431d1a338
--- /dev/null
+++ b/fs/ext2/xip.c
@@ -0,0 +1,80 @@
+/*
+ *  linux/fs/ext2/xip.c
+ *
+ * Copyright (C) 2005 IBM Corporation
+ * Author: Carsten Otte (cotte@de.ibm.com)
+ */
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/buffer_head.h>
+#include <linux/ext2_fs_sb.h>
+#include <linux/ext2_fs.h>
+#include "ext2.h"
+#include "xip.h"
+static inline int
+__inode_direct_access(struct inode *inode, sector_t sector, unsigned long *data) {
+        BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access);
+        return inode->i_sb->s_bdev->bd_disk->fops
+                ->direct_access(inode->i_sb->s_bdev,sector,data);
+}
+int
+ext2_clear_xip_target(struct inode *inode, int block) {
+        sector_t sector = block*(PAGE_SIZE/512);
+        unsigned long data;
+        int rc;
+        rc = __inode_direct_access(inode, sector, &data);
+        if (rc)
+                return rc;
+        clear_page((void*)data);
+        return 0;
+}
+void ext2_xip_verify_sb(struct super_block *sb)
+{
+        struct ext2_sb_info *sbi = EXT2_SB(sb);
+        if ((sbi->s_mount_opt & EXT2_MOUNT_XIP)) {
+                if ((sb->s_bdev == NULL) ||
+                        sb->s_bdev->bd_disk == NULL ||
+                        sb->s_bdev->bd_disk->fops == NULL ||
+                        sb->s_bdev->bd_disk->fops->direct_access == NULL) {
+                        sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
+                        ext2_warning(sb, __FUNCTION__,
+                                "ignoring xip option - not supported by bdev");
+                }
+        }
+}
+struct page*
+ext2_get_xip_page(struct address_space *mapping, sector_t blockno,
+                   int create)
+{
+        int rc;
+        unsigned long data;
+        struct buffer_head tmp;
+        tmp.b_state = 0;
+        tmp.b_blocknr = 0;
+        rc = ext2_get_block(mapping->host, blockno/(PAGE_SIZE/512) , &tmp,
+                                create);
+        if (rc)
+                return ERR_PTR(rc);
+        if (tmp.b_blocknr == 0) {
+                /* SPARSE block */
+                BUG_ON(create);
+                return ERR_PTR(-ENODATA);
+        }
+        rc = __inode_direct_access
+                (mapping->host,tmp.b_blocknr*(PAGE_SIZE/512) ,&data);
+        if (rc)
+                return ERR_PTR(rc);
+        SetPageUptodate(virt_to_page(data));
+        return virt_to_page(data);
+}
diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h
new file mode 100644
index 000000000000..aa85331d6c56
--- /dev/null
+++ b/fs/ext2/xip.h
@@ -0,0 +1,25 @@
+/*
+ *  linux/fs/ext2/xip.h
+ *
+ * Copyright (C) 2005 IBM Corporation
+ * Author: Carsten Otte (cotte@de.ibm.com)
+ */
+#ifdef CONFIG_EXT2_FS_XIP
+extern void ext2_xip_verify_sb (struct super_block *);
+extern int ext2_clear_xip_target (struct inode *, int);
+static inline int ext2_use_xip (struct super_block *sb)
+{
+        struct ext2_sb_info *sbi = EXT2_SB(sb);
+        return (sbi->s_mount_opt & EXT2_MOUNT_XIP);
+}
+struct page* ext2_get_xip_page (struct address_space *, sector_t, int);
+#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page)
+#else
+#define mapping_is_xip(map)                     0
+#define ext2_xip_verify_sb(sb)                  do { } while (0)
+#define ext2_use_xip(sb)                        0
+#define ext2_clear_xip_target(inode, chain)     0
+#define ext2_get_xip_page                       NULL
+#endif
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 638c13a26c03..3ac38266fc9e 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -393,7 +393,8 @@ ext3_acl_chmod(struct inode *inode)
                int retries = 0;
        retry:
-                handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+                handle = ext3_journal_start(inode,
+                                EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
                if (IS_ERR(handle)) {
                        error = PTR_ERR(handle);
                        ext3_std_error(inode->i_sb, error);
@@ -417,12 +418,12 @@ static size_t
 ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
                           const char *name, size_t name_len)
 {
-        const size_t size = sizeof(XATTR_NAME_ACL_ACCESS);
+        const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
        if (!test_opt(inode->i_sb, POSIX_ACL))
                return 0;
        if (list && size <= list_len)
-                memcpy(list, XATTR_NAME_ACL_ACCESS, size);
+                memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
        return size;
 }
@@ -430,12 +431,12 @@ static size_t
 ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
                            const char *name, size_t name_len)
 {
-        const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT);
+        const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
        if (!test_opt(inode->i_sb, POSIX_ACL))
                return 0;
        if (list && size <= list_len)
-                memcpy(list, XATTR_NAME_ACL_DEFAULT, size);
+                memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
        return size;
 }
@@ -503,7 +504,7 @@ ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
                acl = NULL;
 retry:
-        handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+        handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        error = ext3_set_acl(handle, inode, type, acl);
@@ -535,14 +536,14 @@ ext3_xattr_set_acl_default(struct inode *inode, const char *name,
 }
 struct xattr_handler ext3_xattr_acl_access_handler = {
-        .prefix = XATTR_NAME_ACL_ACCESS,
+        .prefix = POSIX_ACL_XATTR_ACCESS,
        .list   = ext3_xattr_list_acl_access,
        .get    = ext3_xattr_get_acl_access,
        .set    = ext3_xattr_set_acl_access,
 };
 struct xattr_handler ext3_xattr_acl_default_handler = {
-        .prefix = XATTR_NAME_ACL_DEFAULT,
+        .prefix = POSIX_ACL_XATTR_DEFAULT,
        .list   = ext3_xattr_list_acl_default,
        .get    = ext3_xattr_get_acl_default,
        .set    = ext3_xattr_set_acl_default,
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 98af0c0d0ba9..92d50b53a933 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -4,7 +4,7 @@
  (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 #define EXT3_ACL_VERSION        0x0001
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0d5fa73b18dc..0b2db4f618cb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -128,7 +128,7 @@ static unsigned long blocks_for_truncate(struct inode *inode)
        if (needed > EXT3_MAX_TRANS_DATA) 
                needed = EXT3_MAX_TRANS_DATA;
-        return EXT3_DATA_TRANS_BLOCKS + needed;
+        return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
 }
 /* 
@@ -2763,7 +2763,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
                /* (user+group)*(old+new) structure, inode write (sb,
                 * inode block, ? - but truncate inode update has it) */
-                handle = ext3_journal_start(inode, 4*EXT3_QUOTA_INIT_BLOCKS+3);
+                handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
+                                        EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
                if (IS_ERR(handle)) {
                        error = PTR_ERR(handle);
                        goto err_out;
@@ -2861,7 +2862,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 #ifdef CONFIG_QUOTA
        /* We know that structure was already allocated during DQUOT_INIT so
         * we will be updating only the data blocks + inodes */
-        ret += 2*EXT3_QUOTA_TRANS_BLOCKS;
+        ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
 #endif
        return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 79742d824a0a..50378d8ff84b 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -932,8 +932,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
        struct inode *dir = dentry->d_parent->d_inode;
        sb = dir->i_sb;
-        if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
+        /* NFS may look up ".." - look at dx_root directory block */
-                return NULL;
+        if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
+                if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
+                        return NULL;
+        } else {
+                frame = frames;
+                frame->bh = NULL;                       /* for dx_release() */
+                frame->at = (struct dx_entry *)frames;  /* hack for zero entry*/
+                dx_set_block(frame->at, 0);             /* dx_root block is 0 */
+        }
        hash = hinfo.hash;
        do {
                block = dx_get_block(frame->at);
@@ -1637,9 +1645,9 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
        int err, retries = 0;
 retry:
-        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
                                        EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-                                        2*EXT3_QUOTA_INIT_BLOCKS);
+                                        2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -1671,9 +1679,9 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
                return -EINVAL;
 retry:
-        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
                                        EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-                                        2*EXT3_QUOTA_INIT_BLOCKS);
+                                        2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -1707,9 +1715,9 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
                return -EMLINK;
 retry:
-        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
                                        EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-                                        2*EXT3_QUOTA_INIT_BLOCKS);
+                                        2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -1998,7 +2006,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
        /* Initialize quotas before so that eventual writes go in
         * separate transaction */
        DQUOT_INIT(dentry->d_inode);
-        handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+        handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -2057,7 +2065,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
        /* Initialize quotas before so that eventual writes go
         * in separate transaction */
        DQUOT_INIT(dentry->d_inode);
-        handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+        handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -2112,9 +2120,9 @@ static int ext3_symlink (struct inode * dir,
                return -ENAMETOOLONG;
 retry:
-        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
                                        EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-                                        2*EXT3_QUOTA_INIT_BLOCKS);
+                                        2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -2166,7 +2174,7 @@ static int ext3_link (struct dentry * old_dentry,
                return -EMLINK;
 retry:
-        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
+        handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
                                        EXT3_INDEX_EXTRA_TRANS_BLOCKS);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
@@ -2208,7 +2216,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
         * in separate transaction */
        if (new_dentry->d_inode)
                DQUOT_INIT(new_dentry->d_inode);
-        handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS +
+        handle = ext3_journal_start(old_dir, 2 *
+                                        EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
                                        EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
        if (IS_ERR(handle))
                return PTR_ERR(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 981ccb233ef5..b4b3e8a39131 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -589,7 +589,7 @@ enum {
        Opt_commit, Opt_journal_update, Opt_journal_inum,
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
-        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
 };
@@ -634,10 +634,10 @@ static match_table_t tokens = {
        {Opt_grpjquota, "grpjquota=%s"},
        {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
        {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
-        {Opt_ignore, "grpquota"},
+        {Opt_quota, "grpquota"},
-        {Opt_ignore, "noquota"},
+        {Opt_noquota, "noquota"},
-        {Opt_ignore, "quota"},
+        {Opt_quota, "quota"},
-        {Opt_ignore, "usrquota"},
+        {Opt_quota, "usrquota"},
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
@@ -876,6 +876,7 @@ set_qf_name:
                                sbi->s_qf_names[qtype] = NULL;
                                return 0;
                        }
+                        set_opt(sbi->s_mount_opt, QUOTA);
                        break;
                case Opt_offusrjquota:
                        qtype = USRQUOTA;
@@ -898,6 +899,17 @@ clear_qf_name:
                case Opt_jqfmt_vfsv0:
                        sbi->s_jquota_fmt = QFMT_VFS_V0;
                        break;
+                case Opt_quota:
+                        set_opt(sbi->s_mount_opt, QUOTA);
+                        break;
+                case Opt_noquota:
+                        if (sb_any_quota_enabled(sb)) {
+                                printk(KERN_ERR "EXT3-fs: Cannot change quota "
+                                        "options when quota turned on.\n");
+                                return 0;
+                        }
+                        clear_opt(sbi->s_mount_opt, QUOTA);
+                        break;
 #else
                case Opt_usrjquota:
                case Opt_grpjquota:
@@ -909,6 +921,9 @@ clear_qf_name:
                                "EXT3-fs: journalled quota options not "
                                "supported.\n");
                        break;
+                case Opt_quota:
+                case Opt_noquota:
+                        break;
 #endif
                case Opt_abort:
                        set_opt(sbi->s_mount_opt, ABORT);
@@ -2238,7 +2253,7 @@ static int ext3_dquot_initialize(struct inode *inode, int type)
        int ret, err;
        /* We may create quota structure so we need to reserve enough blocks */
-        handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS);
+        handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_initialize(inode, type);
@@ -2254,7 +2269,7 @@ static int ext3_dquot_drop(struct inode *inode)
        int ret, err;
        /* We may delete quota structure so we need to reserve enough blocks */
-        handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS);
+        handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_drop(inode);
@@ -2272,7 +2287,7 @@ static int ext3_write_dquot(struct dquot *dquot)
        inode = dquot_to_inode(dquot);
        handle = ext3_journal_start(inode,
-                                        EXT3_QUOTA_TRANS_BLOCKS);
+                                        EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_commit(dquot);
@@ -2288,7 +2303,7 @@ static int ext3_acquire_dquot(struct dquot *dquot)
        handle_t *handle;
        handle = ext3_journal_start(dquot_to_inode(dquot),
-                                        EXT3_QUOTA_INIT_BLOCKS);
+                                        EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_acquire(dquot);
@@ -2304,7 +2319,7 @@ static int ext3_release_dquot(struct dquot *dquot)
        handle_t *handle;
        handle = ext3_journal_start(dquot_to_inode(dquot),
-                                        EXT3_QUOTA_INIT_BLOCKS);
+                                        EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
        if (IS_ERR(handle))
                return PTR_ERR(handle);
        ret = dquot_release(dquot);
@@ -2348,22 +2363,8 @@ static int ext3_write_info(struct super_block *sb, int type)
 */
 static int ext3_quota_on_mount(struct super_block *sb, int type)
 {
-        int err;
+        return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
-        struct dentry *dentry;
+                        EXT3_SB(sb)->s_jquota_fmt, type);
-        struct qstr name = { .name = EXT3_SB(sb)->s_qf_names[type],
-                             .hash = 0,
-                             .len = strlen(EXT3_SB(sb)->s_qf_names[type])};
-        dentry = lookup_hash(&name, sb->s_root);
-        if (IS_ERR(dentry))
-                return PTR_ERR(dentry);
-        err = vfs_quota_on_mount(type, EXT3_SB(sb)->s_jquota_fmt, dentry);
-        /* Now invalidate and put the dentry - quota got its own reference
-         * to inode and dentry has at least wrong hash so we had better
-         * throw it away */
-        d_invalidate(dentry);
-        dput(dentry);
-        return err;
 }
 /*
@@ -2375,6 +2376,8 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
        int err;
        struct nameidata nd;
+        if (!test_opt(sb, QUOTA))
+                return -EINVAL;
        /* Not journalling quota? */
        if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
            !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 4cbc6d0212d3..3f9dfa643b19 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1044,7 +1044,7 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name,
        int error, retries = 0;
 retry:
-        handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
+        handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
        if (IS_ERR(handle)) {
                error = PTR_ERR(handle);
        } else {
diff --git a/fs/file_table.c b/fs/file_table.c
index 03d83cb686b1..fa7849fae134 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -63,42 +63,45 @@ static inline void file_free(struct file *f)
 */
 struct file *get_empty_filp(void)
 {
-static int old_max;
+        static int old_max;
        struct file * f;
        /*
         * Privileged users can go above max_files
         */
-        if (files_stat.nr_files < files_stat.max_files ||
+        if (files_stat.nr_files >= files_stat.max_files &&
-                                capable(CAP_SYS_ADMIN)) {
+                                !capable(CAP_SYS_ADMIN))
-                f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
+                goto over;
-                if (f) {
-                        memset(f, 0, sizeof(*f));
+        f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
-                        if (security_file_alloc(f)) {
+        if (f == NULL)
-                                file_free(f);
+                goto fail;
-                                goto fail;
-                        }
+        memset(f, 0, sizeof(*f));
-                        eventpoll_init_file(f);
+        if (security_file_alloc(f))
-                        atomic_set(&f->f_count, 1);
+                goto fail_sec;
-                        f->f_uid = current->fsuid;
-                        f->f_gid = current->fsgid;
+        eventpoll_init_file(f);
-                        rwlock_init(&f->f_owner.lock);
+        atomic_set(&f->f_count, 1);
-                        /* f->f_version: 0 */
+        f->f_uid = current->fsuid;
-                        INIT_LIST_HEAD(&f->f_list);
+        f->f_gid = current->fsgid;
-                        f->f_maxcount = INT_MAX;
+        rwlock_init(&f->f_owner.lock);
-                        return f;
+        /* f->f_version: 0 */
-                }
+        INIT_LIST_HEAD(&f->f_list);
-        }
+        f->f_maxcount = INT_MAX;
+        return f;
+over:
        /* Ran out of filps - report that */
-        if (files_stat.max_files >= old_max) {
+        if (files_stat.nr_files > old_max) {
                printk(KERN_INFO "VFS: file-max limit %d reached\n",
                                        files_stat.max_files);
-                old_max = files_stat.max_files;
+                old_max = files_stat.nr_files;
-        } else {
-                /* Big problems... */
-                printk(KERN_WARNING "VFS: filp allocation failed\n");
        }
+        goto fail;
+fail_sec:
+        file_free(f);
 fail:
        return NULL;
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8e050fa58218..e94ab398b717 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -485,32 +485,6 @@ static void set_sb_syncing(int val)
        spin_unlock(&sb_lock);
 }
-/*
- * Find a superblock with inodes that need to be synced
- */
-static struct super_block *get_super_to_sync(void)
-{
-        struct super_block *sb;
-restart:
-        spin_lock(&sb_lock);
-        sb = sb_entry(super_blocks.prev);
-        for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
-                if (sb->s_syncing)
-                        continue;
-                sb->s_syncing = 1;
-                sb->s_count++;
-                spin_unlock(&sb_lock);
-                down_read(&sb->s_umount);
-                if (!sb->s_root) {
-                        drop_super(sb);
-                        goto restart;
-                }
-                return sb;
-        }
-        spin_unlock(&sb_lock);
-        return NULL;
-}
 /**
 * sync_inodes - writes all inodes to disk
 * @wait: wait for completion
@@ -530,23 +504,39 @@ restart:
 * outstanding dirty inodes, the writeback goes block-at-a-time within the
 * filesystem's write_inode().  This is extremely slow.
 */
-void sync_inodes(int wait)
+static void __sync_inodes(int wait)
 {
        struct super_block *sb;
-        set_sb_syncing(0);
+        spin_lock(&sb_lock);
-        while ((sb = get_super_to_sync()) != NULL) {
+restart:
-                sync_inodes_sb(sb, 0);
+        list_for_each_entry(sb, &super_blocks, s_list) {
-                sync_blockdev(sb->s_bdev);
+                if (sb->s_syncing)
-                drop_super(sb);
+                        continue;
+                sb->s_syncing = 1;
+                sb->s_count++;
+                spin_unlock(&sb_lock);
+                down_read(&sb->s_umount);
+                if (sb->s_root) {
+                        sync_inodes_sb(sb, wait);
+                        sync_blockdev(sb->s_bdev);
+                }
+                up_read(&sb->s_umount);
+                spin_lock(&sb_lock);
+                if (__put_super_and_need_restart(sb))
+                        goto restart;
        }
+        spin_unlock(&sb_lock);
+}
+void sync_inodes(int wait)
+{
+        set_sb_syncing(0);
+        __sync_inodes(0);
        if (wait) {
                set_sb_syncing(0);
-                while ((sb = get_super_to_sync()) != NULL) {
+                __sync_inodes(1);
-                        sync_inodes_sb(sb, 1);
-                        sync_blockdev(sb->s_bdev);
-                        drop_super(sb);
-                }
        }
 }
diff --git a/fs/inode.c b/fs/inode.c
index 801fe7f36280..1f9a3a2b89bc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -500,7 +500,7 @@ repeat:
                        continue;
                if (!test(inode, data))
                        continue;
-                if (inode->i_state & (I_FREEING|I_CLEAR)) {
+                if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
                        __wait_on_freeing_inode(inode);
                        goto repeat;
                }
@@ -525,7 +525,7 @@ repeat:
                        continue;
                if (inode->i_sb != sb)
                        continue;
-                if (inode->i_state & (I_FREEING|I_CLEAR)) {
+                if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
                        __wait_on_freeing_inode(inode);
                        goto repeat;
                }
@@ -727,7 +727,7 @@ EXPORT_SYMBOL(iunique);
 struct inode *igrab(struct inode *inode)
 {
        spin_lock(&inode_lock);
-        if (!(inode->i_state & I_FREEING))
+        if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
                __iget(inode);
        else
                /*
@@ -1024,17 +1024,21 @@ static void generic_forget_inode(struct inode *inode)
                if (!(inode->i_state & (I_DIRTY|I_LOCK)))
                        list_move(&inode->i_list, &inode_unused);
                inodes_stat.nr_unused++;
-                spin_unlock(&inode_lock);
+                if (!sb || (sb->s_flags & MS_ACTIVE)) {
-                if (!sb || (sb->s_flags & MS_ACTIVE))
+                        spin_unlock(&inode_lock);
                        return;
+                }
+                inode->i_state |= I_WILL_FREE;
+                spin_unlock(&inode_lock);
                write_inode_now(inode, 1);
                spin_lock(&inode_lock);
+                inode->i_state &= ~I_WILL_FREE;
                inodes_stat.nr_unused--;
                hlist_del_init(&inode->i_hash);
        }
        list_del_init(&inode->i_list);
        list_del_init(&inode->i_sb_list);
-        inode->i_state|=I_FREEING;
+        inode->i_state |= I_FREEING;
        inodes_stat.nr_inodes--;
        spin_unlock(&inode_lock);
        if (inode->i_data.nrpages)
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 30a2bf9eeda5..e892dab40c26 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/quotaops.h>
+#include <linux/posix_acl_xattr.h>
 #include "jfs_incore.h"
 #include "jfs_xattr.h"
 #include "jfs_acl.h"
@@ -36,11 +37,11 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
        switch(type) {
                case ACL_TYPE_ACCESS:
-                        ea_name = XATTR_NAME_ACL_ACCESS;
+                        ea_name = POSIX_ACL_XATTR_ACCESS;
                        p_acl = &ji->i_acl;
                        break;
                case ACL_TYPE_DEFAULT:
-                        ea_name = XATTR_NAME_ACL_DEFAULT;
+                        ea_name = POSIX_ACL_XATTR_DEFAULT;
                        p_acl = &ji->i_default_acl;
                        break;
                default:
@@ -88,11 +89,11 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
        switch(type) {
                case ACL_TYPE_ACCESS:
-                        ea_name = XATTR_NAME_ACL_ACCESS;
+                        ea_name = POSIX_ACL_XATTR_ACCESS;
                        p_acl = &ji->i_acl;
                        break;
                case ACL_TYPE_DEFAULT:
-                        ea_name = XATTR_NAME_ACL_DEFAULT;
+                        ea_name = POSIX_ACL_XATTR_DEFAULT;
                        p_acl = &ji->i_default_acl;
                        if (!S_ISDIR(inode->i_mode))
                                return acl ? -EACCES : 0;
@@ -101,7 +102,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                        return -EINVAL;
        }
        if (acl) {
-                size = xattr_acl_size(acl->a_count);
+                size = posix_acl_xattr_size(acl->a_count);
                value = kmalloc(size, GFP_KERNEL);
                if (!value)
                        return -ENOMEM;
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index d2ae430adecf..a3acd3eec059 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,8 +20,6 @@
 #ifdef CONFIG_JFS_POSIX_ACL
-#include <linux/xattr_acl.h>
 int jfs_permission(struct inode *, int, struct nameidata *);
 int jfs_init_acl(struct inode *, struct inode *);
 int jfs_setattr(struct dentry *, struct iattr *);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 810a3653d8b3..ee32211288ce 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -24,6 +24,7 @@
 #include <linux/completion.h>
 #include <linux/vfs.h>
 #include <linux/moduleparam.h>
+#include <linux/posix_acl.h>
 #include <asm/uaccess.h>
 #include "jfs_incore.h"
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 6016373701a3..ee438d429d45 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -19,6 +19,7 @@
 #include <linux/fs.h>
 #include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
 #include <linux/quotaops.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
@@ -718,9 +719,9 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
                return -EPERM;
        /*
-         * XATTR_NAME_ACL_ACCESS is tied to i_mode
+         * POSIX_ACL_XATTR_ACCESS is tied to i_mode
         */
-        if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) {
+        if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) {
                acl = posix_acl_from_xattr(value, value_len);
                if (IS_ERR(acl)) {
                        rc = PTR_ERR(acl);
@@ -750,7 +751,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
                JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
                return 0;
-        } else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) {
+        } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
                acl = posix_acl_from_xattr(value, value_len);
                if (IS_ERR(acl)) {
                        rc = PTR_ERR(acl);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b82e470912e8..6e242556b903 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -191,7 +191,9 @@ lockd(struct svc_rqst *rqstp)
                printk(KERN_DEBUG
                        "lockd: new process, skipping host shutdown\n");
        wake_up(&lockd_exit);
-                
+        flush_signals(current);
        /* Exit the RPC thread */
        svc_exit_thread(rqstp);
diff --git a/fs/namei.c b/fs/namei.c
index a7f7f44119b3..fa8df81ce8ca 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1577,19 +1577,35 @@ do_link:
 *
 * Simple function to lookup and return a dentry and create it
 * if it doesn't exist.  Is SMP-safe.
+ *
+ * Returns with nd->dentry->d_inode->i_sem locked.
 */
 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
 {
-        struct dentry *dentry;
+        struct dentry *dentry = ERR_PTR(-EEXIST);
        down(&nd->dentry->d_inode->i_sem);
-        dentry = ERR_PTR(-EEXIST);
+        /*
+         * Yucky last component or no last component at all?
+         * (foo/., foo/.., /////)
+         */
        if (nd->last_type != LAST_NORM)
                goto fail;
        nd->flags &= ~LOOKUP_PARENT;
+        /*
+         * Do the final lookup.
+         */
        dentry = lookup_hash(&nd->last, nd->dentry);
        if (IS_ERR(dentry))
                goto fail;
+        /*
+         * Special case - lookup gave negative, but... we had foo/bar/
+         * From the vfs_mknod() POV we just have a negative dentry -
+         * all is fine. Let's be bastards - you had / on the end, you've
+         * been asking for (non-existent) directory. -ENOENT for you.
+         */
        if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
                goto enoent;
        return dentry;
diff --git a/fs/namespace.c b/fs/namespace.c
index 3b93e5d750eb..208c079e9fdb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -337,7 +337,7 @@ int may_umount(struct vfsmount *mnt)
 EXPORT_SYMBOL(may_umount);
-void umount_tree(struct vfsmount *mnt)
+static void umount_tree(struct vfsmount *mnt)
 {
        struct vfsmount *p;
        LIST_HEAD(kill);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d6a30c844de3..6537f2c4ae44 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -751,11 +751,6 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
        retval = -EFAULT;
        if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
                goto out;
-        if (file->f_error) {
-                retval = file->f_error;
-                file->f_error = 0;
-                goto out;
-        }
        retval = -EFBIG;
        if (limit != RLIM_INFINITY) {
                if (pos >= limit) {
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9f043f44c92f..ce341dc76d5e 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -10,5 +10,5 @@ nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3)  += nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
 nfsd-$(CONFIG_NFSD_V4)  += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
-                           nfs4acl.o nfs4callback.o
+                           nfs4acl.o nfs4callback.o nfs4recover.o
 nfsd-objs               := $(nfsd-y)
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 11ebf6c4aa54..4a2105552ac4 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -125,7 +125,7 @@ static short ace2type(struct nfs4_ace *);
 static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int);
 static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int);
 int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
-int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
+static int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
 struct nfs4_acl *
 nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
@@ -775,7 +775,7 @@ out_err:
        return pacl;
 }
-int
+static int
 nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
 {
        struct list_head *h, *n;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 634465e9cfc6..583c0710e45e 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -54,7 +54,6 @@
 /* declarations */
 static void nfs4_cb_null(struct rpc_task *task);
-extern spinlock_t recall_lock;
 /* Index of predefined Linux callback client operations */
@@ -329,12 +328,12 @@ out:
        .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2,  \
 }
-struct rpc_procinfo     nfs4_cb_procedures[] = {
+static struct rpc_procinfo     nfs4_cb_procedures[] = {
    PROC(CB_NULL,      NULL,     enc_cb_null,     dec_cb_null),
    PROC(CB_RECALL,    COMPOUND,   enc_cb_recall,      dec_cb_recall),
 };
-struct rpc_version              nfs_cb_version4 = {
+static struct rpc_version       nfs_cb_version4 = {
        .number                 = 1,
        .nrprocs                = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
        .procs                  = nfs4_cb_procedures
@@ -348,7 +347,7 @@ static struct rpc_version *	nfs_cb_version[] = {
 /*
 * Use the SETCLIENTID credential
 */
-struct rpc_cred *
+static struct rpc_cred *
 nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
 {
        struct auth_cred acred;
@@ -387,9 +386,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
        char                    hostname[32];
        int status;
-        dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n",
+        if (atomic_read(&cb->cb_set))
-                        cb->cb_parsed, atomic_read(&cb->cb_set));
-        if (!cb->cb_parsed || atomic_read(&cb->cb_set))
                return;
        /* Initialize address */
@@ -427,7 +424,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
         * XXX AUTH_UNIX only - need AUTH_GSS....
         */
        sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
-        clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
+        clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
        if (IS_ERR(clnt)) {
                dprintk("NFSD: couldn't create callback client\n");
                goto out_err;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 4ba540841cf6..5605a26efc57 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -104,7 +104,7 @@ ent_update(struct ent *new, struct ent *itm)
        ent_init(new, itm);
 }
-void
+static void
 ent_put(struct cache_head *ch, struct cache_detail *cd)
 {
        if (cache_put(ch, cd)) {
@@ -186,7 +186,7 @@ warn_no_idmapd(struct cache_detail *detail)
 static int         idtoname_parse(struct cache_detail *, char *, int);
 static struct ent *idtoname_lookup(struct ent *, int);
-struct cache_detail idtoname_cache = {
+static struct cache_detail idtoname_cache = {
        .hash_size      = ENT_HASHMAX,
        .hash_table     = idtoname_table,
        .name           = "nfs4.idtoname",
@@ -277,7 +277,7 @@ nametoid_hash(struct ent *ent)
        return hash_str(ent->name, ENT_HASHBITS);
 }
-void
+static void
 nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
    int *blen)
 {
@@ -317,9 +317,9 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
 }
 static struct ent *nametoid_lookup(struct ent *, int);
-int                nametoid_parse(struct cache_detail *, char *, int);
+static int         nametoid_parse(struct cache_detail *, char *, int);
-struct cache_detail nametoid_cache = {
+static struct cache_detail nametoid_cache = {
        .hash_size      = ENT_HASHMAX,
        .hash_table     = nametoid_table,
        .name           = "nfs4.nametoid",
@@ -330,7 +330,7 @@ struct cache_detail nametoid_cache = {
        .warn_no_listener = warn_no_idmapd,
 };
-int
+static int
 nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
 {
        struct ent ent, *res;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e8158741e8b5..d71f14517b9c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -45,6 +45,7 @@
 #include <linux/param.h>
 #include <linux/major.h>
 #include <linux/slab.h>
+#include <linux/file.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/nfsd/nfsd.h>
@@ -198,6 +199,11 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
        if (status)
                goto out;
        switch (open->op_claim_type) {
+                case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+                        status = nfserr_inval;
+                        if (open->op_create)
+                                goto out;
+                        /* fall through */
                case NFS4_OPEN_CLAIM_NULL:
                        /*
                         * (1) set CURRENT_FH to the file being opened,
@@ -220,7 +226,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
                        if (status)
                                goto out;
                        break;
-                case NFS4_OPEN_CLAIM_DELEGATE_CUR:
                case NFS4_OPEN_CLAIM_DELEGATE_PREV:
                        printk("NFSD: unsupported OPEN claim type %d\n",
                                open->op_claim_type);
@@ -473,26 +478,27 @@ static inline int
 nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
 {
        int status;
-        struct file *filp = NULL;
        /* no need to check permission - this will be done in nfsd_read() */
+        read->rd_filp = NULL;
        if (read->rd_offset >= OFFSET_MAX)
                return nfserr_inval;
        nfs4_lock_state();
        /* check stateid */
        if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
-                                        CHECK_FH | RD_STATE, &filp))) {
+                                CHECK_FH | RD_STATE, &read->rd_filp))) {
                dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
                goto out;
        }
+        if (read->rd_filp)
+                get_file(read->rd_filp);
        status = nfs_ok;
 out:
        nfs4_unlock_state();
        read->rd_rqstp = rqstp;
        read->rd_fhp = current_fh;
-        read->rd_filp = filp;
        return status;
 }
@@ -532,6 +538,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_rem
 {
        int status;
+        if (nfs4_in_grace())
+                return nfserr_grace;
        status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
        if (status == nfserr_symlink)
                return nfserr_notdir;
@@ -550,6 +558,9 @@ nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
        if (!save_fh->fh_dentry)
                return status;
+        if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags
+                                        & NFSEXP_NOSUBTREECHECK))
+                return nfserr_grace;
        status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
                             rename->rn_snamelen, current_fh,
                             rename->rn_tname, rename->rn_tnamelen);
@@ -624,6 +635,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
                dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
                goto out;
        }
+        if (filp)
+                get_file(filp);
        nfs4_unlock_state();
        write->wr_bytes_written = write->wr_buflen;
@@ -635,6 +648,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
        status =  nfsd_write(rqstp, current_fh, filp, write->wr_offset,
                        write->wr_vec, write->wr_vlen, write->wr_buflen,
                        &write->wr_how_written);
+        if (filp)
+                fput(filp);
        if (status == nfserr_symlink)
                status = nfserr_inval;
@@ -923,6 +938,9 @@ encode_op:
                        nfs4_put_stateowner(replay_owner);
                        replay_owner = NULL;
                }
+                /* XXX Ugh, we need to get rid of this kind of special case: */
+                if (op->opnum == OP_READ && op->u.read.rd_filp)
+                        fput(op->u.read.rd_filp);
        }
 out:
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
new file mode 100644
index 000000000000..095f1740f3ae
--- /dev/null
+++ b/fs/nfsd/nfs4recover.c
@@ -0,0 +1,431 @@
+/*
+*  linux/fs/nfsd/nfs4recover.c
+*
+*  Copyright (c) 2004 The Regents of the University of Michigan.
+*  All rights reserved.
+*
+*  Andy Adamson <andros@citi.umich.edu>
+*
+*  Redistribution and use in source and binary forms, with or without
+*  modification, are permitted provided that the following conditions
+*  are met:
+*
+*  1. Redistributions of source code must retain the above copyright
+*     notice, this list of conditions and the following disclaimer.
+*  2. Redistributions in binary form must reproduce the above copyright
+*     notice, this list of conditions and the following disclaimer in the
+*     documentation and/or other materials provided with the distribution.
+*  3. Neither the name of the University nor the names of its
+*     contributors may be used to endorse or promote products derived
+*     from this software without specific prior written permission.
+*
+*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#include <linux/param.h>
+#include <linux/file.h>
+#include <linux/namei.h>
+#include <asm/uaccess.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#define NFSDDBG_FACILITY                NFSDDBG_PROC
+/* Globals */
+static struct nameidata rec_dir;
+static int rec_dir_init = 0;
+static void
+nfs4_save_user(uid_t *saveuid, gid_t *savegid)
+{
+        *saveuid = current->fsuid;
+        *savegid = current->fsgid;
+        current->fsuid = 0;
+        current->fsgid = 0;
+}
+static void
+nfs4_reset_user(uid_t saveuid, gid_t savegid)
+{
+        current->fsuid = saveuid;
+        current->fsgid = savegid;
+}
+static void
+md5_to_hex(char *out, char *md5)
+{
+        int i;
+        for (i=0; i<16; i++) {
+                unsigned char c = md5[i];
+                *out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
+                *out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
+        }
+        *out = '\0';
+}
+int
+nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
+{
+        struct xdr_netobj cksum;
+        struct crypto_tfm *tfm;
+        struct scatterlist sg[1];
+        int status = nfserr_resource;
+        dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
+                        clname->len, clname->data);
+        tfm = crypto_alloc_tfm("md5", 0);
+        if (tfm == NULL)
+                goto out;
+        cksum.len = crypto_tfm_alg_digestsize(tfm);
+        cksum.data = kmalloc(cksum.len, GFP_KERNEL);
+        if (cksum.data == NULL)
+                goto out;
+        crypto_digest_init(tfm);
+        sg[0].page = virt_to_page(clname->data);
+        sg[0].offset = offset_in_page(clname->data);
+        sg[0].length = clname->len;
+        crypto_digest_update(tfm, sg, 1);
+        crypto_digest_final(tfm, cksum.data);
+        md5_to_hex(dname, cksum.data);
+        kfree(cksum.data);
+        status = nfs_ok;
+out:
+        if (tfm)
+                crypto_free_tfm(tfm);
+        return status;
+}
+static int
+nfsd4_rec_fsync(struct dentry *dentry)
+{
+        struct file *filp;
+        int status = nfs_ok;
+        dprintk("NFSD: nfs4_fsync_rec_dir\n");
+        filp = dentry_open(dget(dentry), mntget(rec_dir.mnt), O_RDWR);
+        if (IS_ERR(filp)) {
+                status = PTR_ERR(filp);
+                goto out;
+        }
+        if (filp->f_op && filp->f_op->fsync)
+                status = filp->f_op->fsync(filp, filp->f_dentry, 0);
+        fput(filp);
+out:
+        if (status)
+                printk("nfsd4: unable to sync recovery directory\n");
+        return status;
+}
+int
+nfsd4_create_clid_dir(struct nfs4_client *clp)
+{
+        char *dname = clp->cl_recdir;
+        struct dentry *dentry;
+        uid_t uid;
+        gid_t gid;
+        int status;
+        dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
+        if (!rec_dir_init || clp->cl_firststate)
+                return 0;
+        nfs4_save_user(&uid, &gid);
+        /* lock the parent */
+        down(&rec_dir.dentry->d_inode->i_sem);
+        dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
+        if (IS_ERR(dentry)) {
+                status = PTR_ERR(dentry);
+                goto out_unlock;
+        }
+        status = -EEXIST;
+        if (dentry->d_inode) {
+                dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
+                goto out_put;
+        }
+        status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
+out_put:
+        dput(dentry);
+out_unlock:
+        up(&rec_dir.dentry->d_inode->i_sem);
+        if (status == 0) {
+                clp->cl_firststate = 1;
+                status = nfsd4_rec_fsync(rec_dir.dentry);
+        }
+        nfs4_reset_user(uid, gid);
+        dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
+        return status;
+}
+typedef int (recdir_func)(struct dentry *, struct dentry *);
+struct dentry_list {
+        struct dentry *dentry;
+        struct list_head list;
+};
+struct dentry_list_arg {
+        struct list_head dentries;
+        struct dentry *parent;
+};
+static int
+nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
+                loff_t offset, ino_t ino, unsigned int d_type)
+{
+        struct dentry_list_arg *dla = arg;
+        struct list_head *dentries = &dla->dentries;
+        struct dentry *parent = dla->parent;
+        struct dentry *dentry;
+        struct dentry_list *child;
+        if (name && isdotent(name, namlen))
+                return nfs_ok;
+        dentry = lookup_one_len(name, parent, namlen);
+        if (IS_ERR(dentry))
+                return PTR_ERR(dentry);
+        child = kmalloc(sizeof(*child), GFP_KERNEL);
+        if (child == NULL)
+                return -ENOMEM;
+        child->dentry = dentry;
+        list_add(&child->list, dentries);
+        return 0;
+}
+static int
+nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
+{
+        struct file *filp;
+        struct dentry_list_arg dla = {
+                .parent = dir,
+        };
+        struct list_head *dentries = &dla.dentries;
+        struct dentry_list *child;
+        uid_t uid;
+        gid_t gid;
+        int status;
+        if (!rec_dir_init)
+                return 0;
+        nfs4_save_user(&uid, &gid);
+        filp = dentry_open(dget(dir), mntget(rec_dir.mnt),
+                        O_RDWR);
+        status = PTR_ERR(filp);
+        if (IS_ERR(filp))
+                goto out;
+        INIT_LIST_HEAD(dentries);
+        status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla);
+        fput(filp);
+        while (!list_empty(dentries)) {
+                child = list_entry(dentries->next, struct dentry_list, list);
+                status = f(dir, child->dentry);
+                if (status)
+                        goto out;
+                list_del(&child->list);
+                dput(child->dentry);
+                kfree(child);
+        }
+out:
+        while (!list_empty(dentries)) {
+                child = list_entry(dentries->next, struct dentry_list, list);
+                list_del(&child->list);
+                dput(child->dentry);
+                kfree(child);
+        }
+        nfs4_reset_user(uid, gid);
+        return status;
+}
+static int
+nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
+{
+        int status;
+        if (!S_ISREG(dir->d_inode->i_mode)) {
+                printk("nfsd4: non-file found in client recovery directory\n");
+                return -EINVAL;
+        }
+        down(&dir->d_inode->i_sem);
+        status = vfs_unlink(dir->d_inode, dentry);
+        up(&dir->d_inode->i_sem);
+        return status;
+}
+static int
+nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
+{
+        int status;
+        /* For now this directory should already be empty, but we empty it of
+         * any regular files anyway, just in case the directory was created by
+         * a kernel from the future.... */
+        nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
+        down(&dir->d_inode->i_sem);
+        status = vfs_rmdir(dir->d_inode, dentry);
+        up(&dir->d_inode->i_sem);
+        return status;
+}
+static int
+nfsd4_unlink_clid_dir(char *name, int namlen)
+{
+        struct dentry *dentry;
+        int status;
+        dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
+        dentry = lookup_one_len(name, rec_dir.dentry, namlen);
+        if (IS_ERR(dentry)) {
+                status = PTR_ERR(dentry);
+                return status;
+        }
+        status = -ENOENT;
+        if (!dentry->d_inode)
+                goto out;
+        status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
+out:
+        dput(dentry);
+        return status;
+}
+void
+nfsd4_remove_clid_dir(struct nfs4_client *clp)
+{
+        uid_t uid;
+        gid_t gid;
+        int status;
+        if (!rec_dir_init || !clp->cl_firststate)
+                return;
+        nfs4_save_user(&uid, &gid);
+        status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
+        nfs4_reset_user(uid, gid);
+        if (status == 0)
+                status = nfsd4_rec_fsync(rec_dir.dentry);
+        if (status)
+                printk("NFSD: Failed to remove expired client state directory"
+                                " %.*s\n", HEXDIR_LEN, clp->cl_recdir);
+        return;
+}
+static int
+purge_old(struct dentry *parent, struct dentry *child)
+{
+        int status;
+        if (nfs4_has_reclaimed_state(child->d_name.name))
+                return nfs_ok;
+        status = nfsd4_clear_clid_dir(parent, child);
+        if (status)
+                printk("failed to remove client recovery directory %s\n",
+                                child->d_name.name);
+        /* Keep trying, success or failure: */
+        return nfs_ok;
+}
+void
+nfsd4_recdir_purge_old(void) {
+        int status;
+        if (!rec_dir_init)
+                return;
+        status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
+        if (status == 0)
+                status = nfsd4_rec_fsync(rec_dir.dentry);
+        if (status)
+                printk("nfsd4: failed to purge old clients from recovery"
+                        " directory %s\n", rec_dir.dentry->d_name.name);
+        return;
+}
+static int
+load_recdir(struct dentry *parent, struct dentry *child)
+{
+        if (child->d_name.len != HEXDIR_LEN - 1) {
+                printk("nfsd4: illegal name %s in recovery directory\n",
+                                child->d_name.name);
+                /* Keep trying; maybe the others are OK: */
+                return nfs_ok;
+        }
+        nfs4_client_to_reclaim(child->d_name.name);
+        return nfs_ok;
+}
+int
+nfsd4_recdir_load(void) {
+        int status;
+        status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir);
+        if (status)
+                printk("nfsd4: failed loading clients from recovery"
+                        " directory %s\n", rec_dir.dentry->d_name.name);
+        return status;
+}
+/*
+ * Hold reference to the recovery directory.
+ */
+void
+nfsd4_init_recdir(char *rec_dirname)
+{
+        uid_t                   uid = 0;
+        gid_t                   gid = 0;
+        int                     status;
+        printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
+                        rec_dirname);
+        BUG_ON(rec_dir_init);
+        nfs4_save_user(&uid, &gid);
+        status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &rec_dir);
+        if (status == -ENOENT)
+                printk("NFSD: recovery directory %s doesn't exist\n",
+                                rec_dirname);
+        if (!status)
+                rec_dir_init = 1;
+        nfs4_reset_user(uid, gid);
+}
+void
+nfsd4_shutdown_recdir(void)
+{
+        if (!rec_dir_init)
+                return;
+        rec_dir_init = 0;
+        path_release(&rec_dir);
+}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 75e8b137580c..89e36526d7f2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -48,39 +48,32 @@
 #include <linux/nfs4.h>
 #include <linux/nfsd/state.h>
 #include <linux/nfsd/xdr4.h>
+#include <linux/namei.h>
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 /* Globals */
 static time_t lease_time = 90;     /* default lease time */
-static time_t old_lease_time = 90; /* past incarnation lease time */
+static time_t user_lease_time = 90;
-static u32 nfs4_reclaim_init = 0;
+static time_t boot_time;
-time_t boot_time;
+static int in_grace = 1;
-static time_t grace_end = 0;
 static u32 current_clientid = 1;
 static u32 current_ownerid = 1;
 static u32 current_fileid = 1;
 static u32 current_delegid = 1;
 static u32 nfs4_init;
-stateid_t zerostateid;             /* bits all 0 */
+static stateid_t zerostateid;             /* bits all 0 */
-stateid_t onestateid;              /* bits all 1 */
+static stateid_t onestateid;              /* bits all 1 */
-/* debug counters */
+#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
-u32 list_add_perfile = 0; 
+#define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
-u32 list_del_perfile = 0;
-u32 add_perclient = 0;
-u32 del_perclient = 0;
-u32 alloc_file = 0;
-u32 free_file = 0;
-u32 vfsopen = 0;
-u32 vfsclose = 0;
-u32 alloc_delegation= 0;
-u32 free_delegation= 0;
 /* forward declarations */
-struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
 static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
 static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
+static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
+static void nfs4_set_recdir(char *recdir);
 /* Locking:
 *
@@ -90,6 +83,11 @@ static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
 */
 static DECLARE_MUTEX(client_sema);
+static kmem_cache_t *stateowner_slab = NULL;
+static kmem_cache_t *file_slab = NULL;
+static kmem_cache_t *stateid_slab = NULL;
+static kmem_cache_t *deleg_slab = NULL;
 void
 nfs4_lock_state(void)
 {
@@ -118,16 +116,36 @@ opaque_hashval(const void *ptr, int nbytes)
 /* forward declarations */
 static void release_stateowner(struct nfs4_stateowner *sop);
 static void release_stateid(struct nfs4_stateid *stp, int flags);
-static void release_file(struct nfs4_file *fp);
 /*
 * Delegation state
 */
 /* recall_lock protects the del_recall_lru */
-spinlock_t recall_lock;
+static spinlock_t recall_lock = SPIN_LOCK_UNLOCKED;
 static struct list_head del_recall_lru;
+static void
+free_nfs4_file(struct kref *kref)
+{
+        struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref);
+        list_del(&fp->fi_hash);
+        iput(fp->fi_inode);
+        kmem_cache_free(file_slab, fp);
+}
+static inline void
+put_nfs4_file(struct nfs4_file *fi)
+{
+        kref_put(&fi->fi_ref, free_nfs4_file);
+}
+static inline void
+get_nfs4_file(struct nfs4_file *fi)
+{
+        kref_get(&fi->fi_ref);
+}
 static struct nfs4_delegation *
 alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
 {
@@ -136,13 +154,14 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
        struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
        dprintk("NFSD alloc_init_deleg\n");
-        if ((dp = kmalloc(sizeof(struct nfs4_delegation),
+        dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
-                GFP_KERNEL)) == NULL)
+        if (dp == NULL)
                return dp;
-        INIT_LIST_HEAD(&dp->dl_del_perfile);
+        INIT_LIST_HEAD(&dp->dl_perfile);
-        INIT_LIST_HEAD(&dp->dl_del_perclnt);
+        INIT_LIST_HEAD(&dp->dl_perclnt);
        INIT_LIST_HEAD(&dp->dl_recall_lru);
        dp->dl_client = clp;
+        get_nfs4_file(fp);
        dp->dl_file = fp;
        dp->dl_flock = NULL;
        get_file(stp->st_vfs_file);
@@ -160,9 +179,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
                        current_fh->fh_handle.fh_size);
        dp->dl_time = 0;
        atomic_set(&dp->dl_count, 1);
-        list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
+        list_add(&dp->dl_perfile, &fp->fi_delegations);
-        list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
+        list_add(&dp->dl_perclnt, &clp->cl_delegations);
-        alloc_delegation++;
        return dp;
 }
@@ -171,8 +189,8 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
 {
        if (atomic_dec_and_test(&dp->dl_count)) {
                dprintk("NFSD: freeing dp %p\n",dp);
-                kfree(dp);
+                put_nfs4_file(dp->dl_file);
-                free_delegation++;
+                kmem_cache_free(deleg_slab, dp);
        }
 }
@@ -193,15 +211,14 @@ nfs4_close_delegation(struct nfs4_delegation *dp)
        if (dp->dl_flock)
                setlease(filp, F_UNLCK, &dp->dl_flock);
        nfsd_close(filp);
-        vfsclose++;
 }
 /* Called under the state lock. */
 static void
 unhash_delegation(struct nfs4_delegation *dp)
 {
-        list_del_init(&dp->dl_del_perfile);
+        list_del_init(&dp->dl_perfile);
-        list_del_init(&dp->dl_del_perclnt);
+        list_del_init(&dp->dl_perclnt);
        spin_lock(&recall_lock);
        list_del_init(&dp->dl_recall_lru);
        spin_unlock(&recall_lock);
@@ -220,8 +237,8 @@ unhash_delegation(struct nfs4_delegation *dp)
 #define clientid_hashval(id) \
        ((id) & CLIENT_HASH_MASK)
-#define clientstr_hashval(name, namelen) \
+#define clientstr_hashval(name) \
-        (opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK)
+        (opaque_hashval((name), 8) & CLIENT_HASH_MASK)
 /*
 * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
 * used in reboot/reset lease grace period processing
@@ -331,11 +348,11 @@ expire_client(struct nfs4_client *clp)
        INIT_LIST_HEAD(&reaplist);
        spin_lock(&recall_lock);
-        while (!list_empty(&clp->cl_del_perclnt)) {
+        while (!list_empty(&clp->cl_delegations)) {
-                dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt);
+                dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
                dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
                                dp->dl_flock);
-                list_del_init(&dp->dl_del_perclnt);
+                list_del_init(&dp->dl_perclnt);
                list_move(&dp->dl_recall_lru, &reaplist);
        }
        spin_unlock(&recall_lock);
@@ -347,26 +364,26 @@ expire_client(struct nfs4_client *clp)
        list_del(&clp->cl_idhash);
        list_del(&clp->cl_strhash);
        list_del(&clp->cl_lru);
-        while (!list_empty(&clp->cl_perclient)) {
+        while (!list_empty(&clp->cl_openowners)) {
-                sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient);
+                sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
                release_stateowner(sop);
        }
        put_nfs4_client(clp);
 }
 static struct nfs4_client *
-create_client(struct xdr_netobj name) {
+create_client(struct xdr_netobj name, char *recdir) {
        struct nfs4_client *clp;
        if (!(clp = alloc_client(name)))
                goto out;
+        memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
        atomic_set(&clp->cl_count, 1);
        atomic_set(&clp->cl_callback.cb_set, 0);
-        clp->cl_callback.cb_parsed = 0;
        INIT_LIST_HEAD(&clp->cl_idhash);
        INIT_LIST_HEAD(&clp->cl_strhash);
-        INIT_LIST_HEAD(&clp->cl_perclient);
+        INIT_LIST_HEAD(&clp->cl_openowners);
-        INIT_LIST_HEAD(&clp->cl_del_perclnt);
+        INIT_LIST_HEAD(&clp->cl_delegations);
        INIT_LIST_HEAD(&clp->cl_lru);
 out:
        return clp;
@@ -392,11 +409,9 @@ copy_cred(struct svc_cred *target, struct svc_cred *source) {
        get_group_info(target->cr_group_info);
 }
-static int
+static inline int
-cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) {
+same_name(const char *n1, const char *n2) {
-        if (!n1 || !n2)
+        return 0 == memcmp(n1, n2, HEXDIR_LEN);
-                return 0;
-        return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len));
 }
 static int
@@ -446,7 +461,7 @@ check_name(struct xdr_netobj name) {
        return 1;
 }
-void
+static void
 add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
 {
        unsigned int idhashval;
@@ -458,7 +473,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
        clp->cl_time = get_seconds();
 }
-void
+static void
 move_to_confirmed(struct nfs4_client *clp)
 {
        unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
@@ -468,8 +483,7 @@ move_to_confirmed(struct nfs4_client *clp)
        list_del_init(&clp->cl_strhash);
        list_del_init(&clp->cl_idhash);
        list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
-        strhashval = clientstr_hashval(clp->cl_name.data, 
+        strhashval = clientstr_hashval(clp->cl_recdir);
-                        clp->cl_name.len);
        list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
        renew_client(clp);
 }
@@ -500,6 +514,30 @@ find_unconfirmed_client(clientid_t *clid)
        return NULL;
 }
+static struct nfs4_client *
+find_confirmed_client_by_str(const char *dname, unsigned int hashval)
+{
+        struct nfs4_client *clp;
+        list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
+                if (same_name(clp->cl_recdir, dname))
+                        return clp;
+        }
+        return NULL;
+}
+static struct nfs4_client *
+find_unconfirmed_client_by_str(const char *dname, unsigned int hashval)
+{
+        struct nfs4_client *clp;
+        list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
+                if (same_name(clp->cl_recdir, dname))
+                        return clp;
+        }
+        return NULL;
+}
 /* a helper function for parse_callback */
 static int
 parse_octet(unsigned int *lenp, char **addrp)
@@ -534,7 +572,7 @@ parse_octet(unsigned int *lenp, char **addrp)
 }
 /* parse and set the setclientid ipv4 callback address */
-int
+static int
 parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
 {
        int temp = 0;
@@ -570,7 +608,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne
        return 1;
 }
-void
+static void
 gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 {
        struct nfs4_callback *cb = &clp->cl_callback;
@@ -584,14 +622,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
                goto out_err;
        cb->cb_prog = se->se_callback_prog;
        cb->cb_ident = se->se_callback_ident;
-        cb->cb_parsed = 1;
        return;
 out_err:
        printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
                "will not receive delegations\n",
                clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-        cb->cb_parsed = 0;
        return;
 }
@@ -638,59 +674,43 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
        };
        nfs4_verifier           clverifier = setclid->se_verf;
        unsigned int            strhashval;
-        struct nfs4_client *    conf, * unconf, * new, * clp;
+        struct nfs4_client      *conf, *unconf, *new;
        int                     status;
+        char                    dname[HEXDIR_LEN];
        
        status = nfserr_inval;
        if (!check_name(clname))
                goto out;
+        status = nfs4_make_rec_clidname(dname, &clname);
+        if (status)
+                goto out;
        /* 
         * XXX The Duplicate Request Cache (DRC) has been checked (??)
         * We get here on a DRC miss.
         */
-        strhashval = clientstr_hashval(clname.data, clname.len);
+        strhashval = clientstr_hashval(dname);
-        conf = NULL;
        nfs4_lock_state();
-        list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) {
+        conf = find_confirmed_client_by_str(dname, strhashval);
-                if (!cmp_name(&clp->cl_name, &clname))
+        if (conf) {
-                        continue;
                /* 
                 * CASE 0:
                 * clname match, confirmed, different principal
                 * or different ip_address
                 */
                status = nfserr_clid_inuse;
-                if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)) {
+                if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred)
-                        printk("NFSD: setclientid: string in use by client"
+                                || conf->cl_addr != ip_addr) {
-                        "(clientid %08x/%08x)\n",
-                        clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-                        goto out;
-                }
-                if (clp->cl_addr != ip_addr) { 
                        printk("NFSD: setclientid: string in use by client"
                        "(clientid %08x/%08x)\n",
-                        clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+                        conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id);
                        goto out;
                }
-                /* 
-                 * cl_name match from a previous SETCLIENTID operation
-                 * XXX check for additional matches?
-                 */
-                conf = clp;
-                break;
-        }
-        unconf = NULL;
-        list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) {
-                if (!cmp_name(&clp->cl_name, &clname))
-                        continue;
-                /* cl_name match from a previous SETCLIENTID operation */
-                unconf = clp;
-                break;
        }
+        unconf = find_unconfirmed_client_by_str(dname, strhashval);
        status = nfserr_resource;
        if (!conf) {
                /* 
@@ -699,7 +719,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
                 */
                if (unconf)
                        expire_client(unconf);
-                if (!(new = create_client(clname)))
+                new = create_client(clname, dname);
+                if (new == NULL)
                        goto out;
                copy_verf(new, &clverifier);
                new->cl_addr = ip_addr;
@@ -722,12 +743,16 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
                 * nfs4_client,  but with the new callback info and a 
                 * new cl_confirm
                 */
-                if ((unconf) && 
+                if (unconf) {
-                    cmp_verf(&unconf->cl_verifier, &conf->cl_verifier) &&
+                        /* Note this is removing unconfirmed {*x***},
-                     cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) {
+                         * which is stronger than RFC recommended {vxc**}.
-                                expire_client(unconf);
+                         * This has the advantage that there is at most
+                         * one {*x***} in either list at any time.
+                         */
+                        expire_client(unconf);
                }
-                if (!(new = create_client(clname)))
+                new = create_client(clname, dname);
+                if (new == NULL)
                        goto out;
                copy_verf(new,&conf->cl_verifier);
                new->cl_addr = ip_addr;
@@ -745,7 +770,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
                 * using input clverifier, clname, and callback info
                 * and generate a new cl_clientid and cl_confirm.
                 */
-                if (!(new = create_client(clname)))
+                new = create_client(clname, dname);
+                if (new == NULL)
                        goto out;
                copy_verf(new,&clverifier);
                new->cl_addr = ip_addr;
@@ -771,7 +797,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
                 * new cl_verifier and a new cl_confirm
                 */
                expire_client(unconf);
-                if (!(new = create_client(clname)))
+                new = create_client(clname, dname);
+                if (new == NULL)
                        goto out;
                copy_verf(new,&clverifier);
                new->cl_addr = ip_addr;
@@ -807,7 +834,7 @@ int
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
        u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
-        struct nfs4_client *clp, *conf = NULL, *unconf = NULL;
+        struct nfs4_client *conf, *unconf;
        nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
        clientid_t * clid = &setclientid_confirm->sc_clientid;
        int status;
@@ -820,102 +847,90 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
         */
        nfs4_lock_state();
-        clp = find_confirmed_client(clid);
-        if (clp) {
+        conf = find_confirmed_client(clid);
-                status = nfserr_inval;
+        unconf = find_unconfirmed_client(clid);
-                /* 
-                 * Found a record for this clientid. If the IP addresses
+        status = nfserr_clid_inuse;
-                 * don't match, return ERR_INVAL just as if the record had
+        if (conf && conf->cl_addr != ip_addr)
-                 * not been found.
+                goto out;
-                 */
+        if (unconf && unconf->cl_addr != ip_addr)
-                if (clp->cl_addr != ip_addr) { 
+                goto out;
-                        printk("NFSD: setclientid: string in use by client"
-                        "(clientid %08x/%08x)\n",
-                        clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-                        goto out;
-                }
-                conf = clp;
-        }
-        clp = find_unconfirmed_client(clid);
-        if (clp) {
-                status = nfserr_inval;
-                if (clp->cl_addr != ip_addr) { 
-                        printk("NFSD: setclientid: string in use by client"
-                        "(clientid %08x/%08x)\n",
-                        clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
-                        goto out;
-                }
-                unconf = clp;
-        }
-        /* CASE 1: 
-        * unconf record that matches input clientid and input confirm.
-        * conf record that matches input clientid.
-        * conf  and unconf records match names, verifiers 
-        */
        if ((conf && unconf) && 
            (cmp_verf(&unconf->cl_confirm, &confirm)) &&
            (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
-            (cmp_name(&conf->cl_name,&unconf->cl_name))  &&
+            (same_name(conf->cl_recdir,unconf->cl_recdir))  &&
            (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
+                /* CASE 1:
+                * unconf record that matches input clientid and input confirm.
+                * conf record that matches input clientid.
+                * conf and unconf records match names, verifiers
+                */
                if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 
                        status = nfserr_clid_inuse;
                else {
-                        expire_client(conf);
+                        /* XXX: We just turn off callbacks until we can handle
-                        clp = unconf;
+                          * change request correctly. */
-                        move_to_confirmed(unconf);
+                        atomic_set(&conf->cl_callback.cb_set, 0);
+                        gen_confirm(conf);
+                        expire_client(unconf);
                        status = nfs_ok;
                }
-                goto out;
+        } else if ((conf && !unconf) ||
-        } 
-        /* CASE 2:
-         * conf record that matches input clientid.
-         * if unconf record that matches input clientid, then unconf->cl_name
-         * or unconf->cl_verifier don't match the conf record.
-         */
-        if ((conf && !unconf) || 
            ((conf && unconf) && 
             (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
-              !cmp_name(&conf->cl_name, &unconf->cl_name)))) {
+              !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
-                if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
+                /* CASE 2:
+                 * conf record that matches input clientid.
+                 * if unconf record matches input clientid, then
+                 * unconf->cl_name or unconf->cl_verifier don't match the
+                 * conf record.
+                 */
+                if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred))
                        status = nfserr_clid_inuse;
-                } else {
+                else
-                        clp = conf;
                        status = nfs_ok;
-                }
+        } else if (!conf && unconf
-                goto out;
+                        && cmp_verf(&unconf->cl_confirm, &confirm)) {
-        }
+                /* CASE 3:
-        /* CASE 3:
+                 * conf record not found.
-         * conf record not found.
+                 * unconf record found.
-         * unconf record found. 
+                 * unconf->cl_confirm matches input confirm
-         * unconf->cl_confirm matches input confirm
+                 */
-         */ 
-        if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
                if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
                        status = nfserr_clid_inuse;
                } else {
-                        status = nfs_ok;
+                        unsigned int hash =
-                        clp = unconf;
+                                clientstr_hashval(unconf->cl_recdir);
+                        conf = find_confirmed_client_by_str(unconf->cl_recdir,
+                                                                        hash);
+                        if (conf) {
+                                nfsd4_remove_clid_dir(conf);
+                                expire_client(conf);
+                        }
                        move_to_confirmed(unconf);
+                        conf = unconf;
+                        status = nfs_ok;
                }
-                goto out;
+        } else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm)))
-        }
+            && (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm,
-        /* CASE 4:
+                                                                &confirm)))) {
-         * conf record not found, or if conf, then conf->cl_confirm does not
+                /* CASE 4:
-         * match input confirm.
+                 * conf record not found, or if conf, conf->cl_confirm does not
-         * unconf record not found, or if unconf, then unconf->cl_confirm 
+                 * match input confirm.
-         * does not match input confirm.
+                 * unconf record not found, or if unconf, unconf->cl_confirm
-         */
+                 * does not match input confirm.
-        if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) &&
+                 */
-            (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) {
                status = nfserr_stale_clientid;
-                goto out;
+        } else {
+                /* check that we have hit one of the cases...*/
+                status = nfserr_clid_inuse;
        }
-        /* check that we have hit one of the cases...*/
-        status = nfserr_inval;
-        goto out;
 out:
        if (!status)
-                nfsd4_probe_callback(clp);
+                nfsd4_probe_callback(conf);
        nfs4_unlock_state();
        return status;
 }
@@ -961,60 +976,65 @@ alloc_init_file(struct inode *ino)
        struct nfs4_file *fp;
        unsigned int hashval = file_hashval(ino);
-        if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) {
+        fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
+        if (fp) {
+                kref_init(&fp->fi_ref);
                INIT_LIST_HEAD(&fp->fi_hash);
-                INIT_LIST_HEAD(&fp->fi_perfile);
+                INIT_LIST_HEAD(&fp->fi_stateids);
-                INIT_LIST_HEAD(&fp->fi_del_perfile);
+                INIT_LIST_HEAD(&fp->fi_delegations);
                list_add(&fp->fi_hash, &file_hashtbl[hashval]);
                fp->fi_inode = igrab(ino);
                fp->fi_id = current_fileid++;
-                alloc_file++;
                return fp;
        }
        return NULL;
 }
 static void
-release_all_files(void)
+nfsd4_free_slab(kmem_cache_t **slab)
 {
-        int i;
+        int status;
-        struct nfs4_file *fp;
-        for (i=0;i<FILE_HASH_SIZE;i++) {
+        if (*slab == NULL)
-                while (!list_empty(&file_hashtbl[i])) {
+                return;
-                        fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
+        status = kmem_cache_destroy(*slab);
-                        /* this should never be more than once... */
+        *slab = NULL;
-                        if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) {
+        WARN_ON(status);
-                                printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
-                        }
-                        release_file(fp);
-                }
-        }
 }
-kmem_cache_t *stateowner_slab = NULL;
+static void
+nfsd4_free_slabs(void)
+{
+        nfsd4_free_slab(&stateowner_slab);
+        nfsd4_free_slab(&file_slab);
+        nfsd4_free_slab(&stateid_slab);
+        nfsd4_free_slab(&deleg_slab);
+}
 static int
 nfsd4_init_slabs(void)
 {
        stateowner_slab = kmem_cache_create("nfsd4_stateowners",
                        sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
-        if (stateowner_slab == NULL) {
+        if (stateowner_slab == NULL)
-                dprintk("nfsd4: out of memory while initializing nfsv4\n");
+                goto out_nomem;
-                return -ENOMEM;
+        file_slab = kmem_cache_create("nfsd4_files",
-        }
+                        sizeof(struct nfs4_file), 0, 0, NULL, NULL);
+        if (file_slab == NULL)
+                goto out_nomem;
+        stateid_slab = kmem_cache_create("nfsd4_stateids",
+                        sizeof(struct nfs4_stateid), 0, 0, NULL, NULL);
+        if (stateid_slab == NULL)
+                goto out_nomem;
+        deleg_slab = kmem_cache_create("nfsd4_delegations",
+                        sizeof(struct nfs4_delegation), 0, 0, NULL, NULL);
+        if (deleg_slab == NULL)
+                goto out_nomem;
        return 0;
-}
+out_nomem:
+        nfsd4_free_slabs();
-static void
+        dprintk("nfsd4: out of memory while initializing nfsv4\n");
-nfsd4_free_slabs(void)
+        return -ENOMEM;
-{
-        int status = 0;
-        if (stateowner_slab)
-                status = kmem_cache_destroy(stateowner_slab);
-        stateowner_slab = NULL;
-        BUG_ON(status);
 }
 void
@@ -1055,14 +1075,13 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
        INIT_LIST_HEAD(&sop->so_idhash);
        INIT_LIST_HEAD(&sop->so_strhash);
        INIT_LIST_HEAD(&sop->so_perclient);
-        INIT_LIST_HEAD(&sop->so_perfilestate);
+        INIT_LIST_HEAD(&sop->so_stateids);
-        INIT_LIST_HEAD(&sop->so_perlockowner);  /* not used */
+        INIT_LIST_HEAD(&sop->so_perstateid);  /* not used */
        INIT_LIST_HEAD(&sop->so_close_lru);
        sop->so_time = 0;
        list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
        list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
-        list_add(&sop->so_perclient, &clp->cl_perclient);
+        list_add(&sop->so_perclient, &clp->cl_openowners);
-        add_perclient++;
        sop->so_is_open_owner = 1;
        sop->so_id = current_ownerid++;
        sop->so_client = clp;
@@ -1080,10 +1099,10 @@ release_stateid_lockowners(struct nfs4_stateid *open_stp)
 {
        struct nfs4_stateowner *lock_sop;
-        while (!list_empty(&open_stp->st_perlockowner)) {
+        while (!list_empty(&open_stp->st_lockowners)) {
-                lock_sop = list_entry(open_stp->st_perlockowner.next,
+                lock_sop = list_entry(open_stp->st_lockowners.next,
-                                struct nfs4_stateowner, so_perlockowner);
+                                struct nfs4_stateowner, so_perstateid);
-                /* list_del(&open_stp->st_perlockowner);  */
+                /* list_del(&open_stp->st_lockowners);  */
                BUG_ON(lock_sop->so_is_open_owner);
                release_stateowner(lock_sop);
        }
@@ -1096,14 +1115,12 @@ unhash_stateowner(struct nfs4_stateowner *sop)
        list_del(&sop->so_idhash);
        list_del(&sop->so_strhash);
-        if (sop->so_is_open_owner) {
+        if (sop->so_is_open_owner)
                list_del(&sop->so_perclient);
-                del_perclient++;
+        list_del(&sop->so_perstateid);
-        }
+        while (!list_empty(&sop->so_stateids)) {
-        list_del(&sop->so_perlockowner);
+                stp = list_entry(sop->so_stateids.next,
-        while (!list_empty(&sop->so_perfilestate)) {
+                        struct nfs4_stateid, st_perstateowner);
-                stp = list_entry(sop->so_perfilestate.next, 
-                        struct nfs4_stateid, st_perfilestate);
                if (sop->so_is_open_owner)
                        release_stateid(stp, OPEN_STATE);
                else
@@ -1125,14 +1142,14 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
        unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
        INIT_LIST_HEAD(&stp->st_hash);
-        INIT_LIST_HEAD(&stp->st_perfilestate);
+        INIT_LIST_HEAD(&stp->st_perstateowner);
-        INIT_LIST_HEAD(&stp->st_perlockowner);
+        INIT_LIST_HEAD(&stp->st_lockowners);
        INIT_LIST_HEAD(&stp->st_perfile);
        list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
-        list_add(&stp->st_perfilestate, &sop->so_perfilestate);
+        list_add(&stp->st_perstateowner, &sop->so_stateids);
-        list_add_perfile++;
+        list_add(&stp->st_perfile, &fp->fi_stateids);
-        list_add(&stp->st_perfile, &fp->fi_perfile);
        stp->st_stateowner = sop;
+        get_nfs4_file(fp);
        stp->st_file = fp;
        stp->st_stateid.si_boot = boot_time;
        stp->st_stateid.si_stateownerid = sop->so_id;
@@ -1150,30 +1167,20 @@ release_stateid(struct nfs4_stateid *stp, int flags)
        struct file *filp = stp->st_vfs_file;
        list_del(&stp->st_hash);
-        list_del_perfile++;
        list_del(&stp->st_perfile);
-        list_del(&stp->st_perfilestate);
+        list_del(&stp->st_perstateowner);
        if (flags & OPEN_STATE) {
                release_stateid_lockowners(stp);
                stp->st_vfs_file = NULL;
                nfsd_close(filp);
-                vfsclose++;
        } else if (flags & LOCK_STATE)
                locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
-        kfree(stp);
+        put_nfs4_file(stp->st_file);
+        kmem_cache_free(stateid_slab, stp);
        stp = NULL;
 }
 static void
-release_file(struct nfs4_file *fp)
-{
-        free_file++;
-        list_del(&fp->fi_hash);
-        iput(fp->fi_inode);
-        kfree(fp);
-}       
-void
 move_to_close_lru(struct nfs4_stateowner *sop)
 {
        dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
@@ -1183,11 +1190,10 @@ move_to_close_lru(struct nfs4_stateowner *sop)
        sop->so_time = get_seconds();
 }
-void
+static void
 release_state_owner(struct nfs4_stateid *stp, int flag)
 {
        struct nfs4_stateowner *sop = stp->st_stateowner;
-        struct nfs4_file *fp = stp->st_file;
        dprintk("NFSD: release_state_owner\n");
        release_stateid(stp, flag);
@@ -1196,12 +1202,8 @@ release_state_owner(struct nfs4_stateid *stp, int flag)
         * released by the laundromat service after the lease period
         * to enable us to handle CLOSE replay
         */
-        if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
+        if (sop->so_confirmed && list_empty(&sop->so_stateids))
                move_to_close_lru(sop);
-        /* unused nfs4_file's are releseed. XXX slab cache? */
-        if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
-                release_file(fp);
-        }
 }
 static int
@@ -1231,8 +1233,10 @@ find_file(struct inode *ino)
        struct nfs4_file *fp;
        list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
-                if (fp->fi_inode == ino)
+                if (fp->fi_inode == ino) {
+                        get_nfs4_file(fp);
                        return fp;
+                }
        }
        return NULL;
 }
@@ -1240,7 +1244,7 @@ find_file(struct inode *ino)
 #define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0)
 #define TEST_DENY(x) ((x >= 0 || x < 5)?1:0)
-void
+static void
 set_access(unsigned int *access, unsigned long bmap) {
        int i;
@@ -1251,7 +1255,7 @@ set_access(unsigned int *access, unsigned long bmap) {
        }
 }
-void
+static void
 set_deny(unsigned int *deny, unsigned long bmap) {
        int i;
@@ -1277,25 +1281,30 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
 * Called to check deny when READ with all zero stateid or
 * WRITE with all zero or all one stateid
 */
-int
+static int
 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 {
        struct inode *ino = current_fh->fh_dentry->d_inode;
        struct nfs4_file *fp;
        struct nfs4_stateid *stp;
+        int ret;
        dprintk("NFSD: nfs4_share_conflict\n");
        fp = find_file(ino);
-        if (fp) {
+        if (!fp)
+                return nfs_ok;
+        ret = nfserr_share_denied;
        /* Search for conflicting share reservations */
-                list_for_each_entry(stp, &fp->fi_perfile, st_perfile) {
+        list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
-                        if (test_bit(deny_type, &stp->st_deny_bmap) ||
+                if (test_bit(deny_type, &stp->st_deny_bmap) ||
-                            test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
+                    test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
-                                return nfserr_share_denied;
+                        goto out;
-                }
        }
-        return nfs_ok;
+        ret = nfs_ok;
+out:
+        put_nfs4_file(fp);
+        return ret;
 }
 static inline void
@@ -1427,7 +1436,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
                return -EAGAIN;
 }
-struct lock_manager_operations nfsd_lease_mng_ops = {
+static struct lock_manager_operations nfsd_lease_mng_ops = {
        .fl_break = nfsd_break_deleg_cb,
        .fl_release_private = nfsd_release_deleg_cb,
        .fl_copy_lock = nfsd_copy_lock_deleg_cb,
@@ -1526,6 +1535,51 @@ out:
        return status;
 }
+static inline int
+nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
+{
+        if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+                return nfserr_openmode;
+        else
+                return nfs_ok;
+}
+static struct nfs4_delegation *
+find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
+{
+        struct nfs4_delegation *dp;
+        list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
+                if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
+                        return dp;
+        }
+        return NULL;
+}
+static int
+nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
+                struct nfs4_delegation **dp)
+{
+        int flags;
+        int status = nfserr_bad_stateid;
+        *dp = find_delegation_file(fp, &open->op_delegate_stateid);
+        if (*dp == NULL)
+                goto out;
+        flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ?
+                                                RD_STATE : WR_STATE;
+        status = nfs4_check_delegmode(*dp, flags);
+        if (status)
+                *dp = NULL;
+out:
+        if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR)
+                return nfs_ok;
+        if (status)
+                return status;
+        open->op_stateowner->so_confirmed = 1;
+        return nfs_ok;
+}
 static int
 nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
 {
@@ -1533,7 +1587,7 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_state
        int status = nfserr_share_denied;
        struct nfs4_stateowner *sop = open->op_stateowner;
-        list_for_each_entry(local, &fp->fi_perfile, st_perfile) {
+        list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
                /* ignore lock owners */
                if (local->st_stateowner->so_is_open_owner == 0)
                        continue;
@@ -1549,25 +1603,37 @@ out:
        return status;
 }
+static inline struct nfs4_stateid *
+nfs4_alloc_stateid(void)
+{
+        return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
+}
 static int
 nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
+                struct nfs4_delegation *dp,
                struct svc_fh *cur_fh, int flags)
 {
        struct nfs4_stateid *stp;
-        int status;
-        stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL);
+        stp = nfs4_alloc_stateid();
        if (stp == NULL)
                return nfserr_resource;
-        status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, &stp->st_vfs_file);
+        if (dp) {
-        if (status) {
+                get_file(dp->dl_vfs_file);
-                if (status == nfserr_dropit)
+                stp->st_vfs_file = dp->dl_vfs_file;
-                        status = nfserr_jukebox;
+        } else {
-                kfree(stp);
+                int status;
-                return status;
+                status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
+                                &stp->st_vfs_file);
+                if (status) {
+                        if (status == nfserr_dropit)
+                                status = nfserr_jukebox;
+                        kmem_cache_free(stateid_slab, stp);
+                        return status;
+                }
        }
-        vfsopen++;
        *stpp = stp;
        return 0;
 }
@@ -1628,6 +1694,7 @@ nfs4_set_claim_prev(struct nfsd4_open *open, int *status)
                        *status = nfserr_reclaim_bad;
                else {
                        open->op_stateowner->so_confirmed = 1;
+                        open->op_stateowner->so_client->cl_firststate = 1;
                        open->op_stateowner->so_seqid--;
                }
        }
@@ -1646,14 +1713,30 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
        int status, flag = 0;
        flag = NFS4_OPEN_DELEGATE_NONE;
-        if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL
+        open->op_recall = 0;
-             || !atomic_read(&cb->cb_set) || !sop->so_confirmed)
+        switch (open->op_claim_type) {
-                goto out;
+                case NFS4_OPEN_CLAIM_PREVIOUS:
+                        if (!atomic_read(&cb->cb_set))
-        if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+                                open->op_recall = 1;
-                flag = NFS4_OPEN_DELEGATE_WRITE;
+                        flag = open->op_delegate_type;
-        else
+                        if (flag == NFS4_OPEN_DELEGATE_NONE)
-                flag = NFS4_OPEN_DELEGATE_READ;
+                                goto out;
+                        break;
+                case NFS4_OPEN_CLAIM_NULL:
+                        /* Let's not give out any delegations till everyone's
+                         * had the chance to reclaim theirs.... */
+                        if (nfs4_in_grace())
+                                goto out;
+                        if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
+                                goto out;
+                        if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+                                flag = NFS4_OPEN_DELEGATE_WRITE;
+                        else
+                                flag = NFS4_OPEN_DELEGATE_READ;
+                        break;
+                default:
+                        goto out;
+        }
        dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
        if (dp == NULL) {
@@ -1687,6 +1770,10 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
                     dp->dl_stateid.si_fileid,
                     dp->dl_stateid.si_generation);
 out:
+        if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
+                        && flag == NFS4_OPEN_DELEGATE_NONE
+                        && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
+                printk("NFSD: WARNING: refusing delegation reclaim\n");
        open->op_delegate_type = flag;
 }
@@ -1699,6 +1786,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
        struct nfs4_file *fp = NULL;
        struct inode *ino = current_fh->fh_dentry->d_inode;
        struct nfs4_stateid *stp = NULL;
+        struct nfs4_delegation *dp = NULL;
        int status;
        status = nfserr_inval;
@@ -1713,7 +1801,13 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
        if (fp) {
                if ((status = nfs4_check_open(fp, open, &stp)))
                        goto out;
+                status = nfs4_check_deleg(fp, open, &dp);
+                if (status)
+                        goto out;
        } else {
+                status = nfserr_bad_stateid;
+                if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
+                        goto out;
                status = nfserr_resource;
                fp = alloc_init_file(ino);
                if (fp == NULL)
@@ -1736,7 +1830,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
                        flags = MAY_WRITE;
                else
                        flags = MAY_READ;
-                if ((status = nfs4_new_open(rqstp, &stp, current_fh, flags)))
+                status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
+                if (status)
                        goto out;
                init_stateid(stp, fp, open);
                status = nfsd4_truncate(rqstp, current_fh, open);
@@ -1759,10 +1854,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
                    stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
                    stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
 out:
-        /* take the opportunity to clean up unused state */
+        if (fp)
-        if (fp && list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile))
+                put_nfs4_file(fp);
-                release_file(fp);
        /* CLAIM_PREVIOUS has different error returns */
        nfs4_set_claim_prev(open, &status);
        /*
@@ -1775,6 +1868,7 @@ out:
        return status;
 }
+static struct workqueue_struct *laundry_wq;
 static struct work_struct laundromat_work;
 static void laundromat_main(void *);
 static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
@@ -1800,7 +1894,7 @@ nfsd4_renew(clientid_t *clid)
        }
        renew_client(clp);
        status = nfserr_cb_path_down;
-        if (!list_empty(&clp->cl_del_perclnt)
+        if (!list_empty(&clp->cl_delegations)
                        && !atomic_read(&clp->cl_callback.cb_set))
                goto out;
        status = nfs_ok;
@@ -1809,7 +1903,15 @@ out:
        return status;
 }
-time_t
+static void
+end_grace(void)
+{
+        dprintk("NFSD: end of grace period\n");
+        nfsd4_recdir_purge_old();
+        in_grace = 0;
+}
+static time_t
 nfs4_laundromat(void)
 {
        struct nfs4_client *clp;
@@ -1823,6 +1925,8 @@ nfs4_laundromat(void)
        nfs4_lock_state();
        dprintk("NFSD: laundromat service - starting\n");
+        if (in_grace)
+                end_grace();
        list_for_each_safe(pos, next, &client_lru) {
                clp = list_entry(pos, struct nfs4_client, cl_lru);
                if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -1833,6 +1937,7 @@ nfs4_laundromat(void)
                }
                dprintk("NFSD: purging unused client (clientid %08x)\n",
                        clp->cl_clientid.cl_id);
+                nfsd4_remove_clid_dir(clp);
                expire_client(clp);
        }
        INIT_LIST_HEAD(&reaplist);
@@ -1882,13 +1987,13 @@ laundromat_main(void *not_used)
        t = nfs4_laundromat();
        dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
-        schedule_delayed_work(&laundromat_work, t*HZ);
+        queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
 }
 /* search ownerid_hashtbl[] and close_lru for stateid owner
 * (stateid->si_stateownerid)
 */
-struct nfs4_stateowner *
+static struct nfs4_stateowner *
 find_openstateowner_id(u32 st_id, int flags) {
        struct nfs4_stateowner *local = NULL;
@@ -1949,15 +2054,6 @@ out:
 }
 static inline int
-nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
-{
-        if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
-                return nfserr_openmode;
-        else
-                return nfs_ok;
-}
-static inline int
 check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
 {
        /* Trying to call delegreturn with a special stateid? Yuch: */
@@ -2071,7 +2167,7 @@ out:
 /* 
 * Checks for sequence id mutating operations. 
 */
-int
+static int
 nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid)
 {
        int status;
@@ -2230,6 +2326,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
                         stp->st_stateid.si_stateownerid,
                         stp->st_stateid.si_fileid,
                         stp->st_stateid.si_generation);
+        nfsd4_create_clid_dir(sop->so_client);
 out:
        if (oc->oc_stateowner)
                nfs4_get_stateowner(oc->oc_stateowner);
@@ -2387,7 +2485,7 @@ static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
 static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
 static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
-struct nfs4_stateid *
+static struct nfs4_stateid *
 find_stateid(stateid_t *stid, int flags)
 {
        struct nfs4_stateid *local = NULL;
@@ -2419,25 +2517,19 @@ find_stateid(stateid_t *stid, int flags)
 static struct nfs4_delegation *
 find_delegation_stateid(struct inode *ino, stateid_t *stid)
 {
-        struct nfs4_delegation *dp = NULL;
+        struct nfs4_file *fp;
-        struct nfs4_file *fp = NULL;
+        struct nfs4_delegation *dl;
-        u32 st_id;
        dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
                    stid->si_boot, stid->si_stateownerid,
                    stid->si_fileid, stid->si_generation);
-        st_id = stid->si_stateownerid;
        fp = find_file(ino);
-        if (fp) {
+        if (!fp)
-                list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+                return NULL;
-                        if(dp->dl_stateid.si_stateownerid == st_id) {
+        dl = find_delegation_file(fp, stid);
-                                dprintk("NFSD: find_delegation dp %p\n",dp);
+        put_nfs4_file(fp);
-                                return dp;
+        return dl;
-                        }
-                }
-        }
-        return NULL;
 }
 /*
@@ -2457,7 +2549,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
                lock->fl_end = OFFSET_MAX;
 }
-int
+static int
 nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval)
 {
        struct nfs4_stateowner *local = NULL;
@@ -2498,22 +2590,6 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
 }
 static struct nfs4_stateowner *
-find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid)
-{
-        struct nfs4_stateowner *local = NULL;
-        int i;
-        for (i = 0; i < LOCK_HASH_SIZE; i++) {
-                list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) {
-                        if (!cmp_owner_str(local, owner, clid))
-                                continue;
-                        return local;
-                }
-        }
-        return NULL;
-}
-static struct nfs4_stateowner *
 find_lockstateowner_str(struct inode *inode, clientid_t *clid,
                struct xdr_netobj *owner)
 {
@@ -2548,13 +2624,13 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
        INIT_LIST_HEAD(&sop->so_idhash);
        INIT_LIST_HEAD(&sop->so_strhash);
        INIT_LIST_HEAD(&sop->so_perclient);
-        INIT_LIST_HEAD(&sop->so_perfilestate);
+        INIT_LIST_HEAD(&sop->so_stateids);
-        INIT_LIST_HEAD(&sop->so_perlockowner);
+        INIT_LIST_HEAD(&sop->so_perstateid);
        INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
        sop->so_time = 0;
        list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
        list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
-        list_add(&sop->so_perlockowner, &open_stp->st_perlockowner);
+        list_add(&sop->so_perstateid, &open_stp->st_lockowners);
        sop->so_is_open_owner = 0;
        sop->so_id = current_ownerid++;
        sop->so_client = clp;
@@ -2567,24 +2643,24 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
        return sop;
 }
-struct nfs4_stateid *
+static struct nfs4_stateid *
 alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp)
 {
        struct nfs4_stateid *stp;
        unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
-        if ((stp = kmalloc(sizeof(struct nfs4_stateid), 
+        stp = nfs4_alloc_stateid();
-                                        GFP_KERNEL)) == NULL)
+        if (stp == NULL)
                goto out;
        INIT_LIST_HEAD(&stp->st_hash);
        INIT_LIST_HEAD(&stp->st_perfile);
-        INIT_LIST_HEAD(&stp->st_perfilestate);
+        INIT_LIST_HEAD(&stp->st_perstateowner);
-        INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */
+        INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
        list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
-        list_add(&stp->st_perfile, &fp->fi_perfile);
+        list_add(&stp->st_perfile, &fp->fi_stateids);
-        list_add_perfile++;
+        list_add(&stp->st_perstateowner, &sop->so_stateids);
-        list_add(&stp->st_perfilestate, &sop->so_perfilestate);
        stp->st_stateowner = sop;
+        get_nfs4_file(fp);
        stp->st_file = fp;
        stp->st_stateid.si_boot = boot_time;
        stp->st_stateid.si_stateownerid = sop->so_id;
@@ -2598,7 +2674,7 @@ out:
        return stp;
 }
-int
+static int
 check_lock_length(u64 offset, u64 length)
 {
        return ((length == 0)  || ((length != ~(u64)0) &&
@@ -2611,7 +2687,7 @@ check_lock_length(u64 offset, u64 length)
 int
 nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
 {
-        struct nfs4_stateowner *lock_sop = NULL, *open_sop = NULL;
+        struct nfs4_stateowner *open_sop = NULL;
        struct nfs4_stateid *lock_stp;
        struct file *filp;
        struct file_lock file_lock;
@@ -2670,16 +2746,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
                strhashval = lock_ownerstr_hashval(fp->fi_inode, 
                                open_sop->so_client->cl_clientid.cl_id, 
                                &lock->v.new.owner);
-                /* 
+                /* XXX: Do we need to check for duplicate stateowners on
-                 * If we already have this lock owner, the client is in 
+                 * the same file, or should they just be allowed (and
-                 * error (or our bookeeping is wrong!) 
+                 * create new stateids)? */
-                 * for asking for a 'new lock'.
-                 */
-                status = nfserr_bad_stateid;
-                lock_sop = find_lockstateowner(&lock->v.new.owner,
-                                                &lock->v.new.clientid);
-                if (lock_sop)
-                        goto out;
                status = nfserr_resource;
                if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
                        goto out;
@@ -2970,8 +3039,11 @@ int
 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
 {
        clientid_t *clid = &rlockowner->rl_clientid;
-        struct nfs4_stateowner *local = NULL;
+        struct nfs4_stateowner *sop;
+        struct nfs4_stateid *stp;
        struct xdr_netobj *owner = &rlockowner->rl_owner;
+        struct list_head matches;
+        int i;
        int status;
        dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -2987,22 +3059,32 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
        nfs4_lock_state();
-        status = nfs_ok;
+        status = nfserr_locks_held;
-        local = find_lockstateowner(owner, clid);
+        /* XXX: we're doing a linear search through all the lockowners.
-        if (local) {
+         * Yipes!  For now we'll just hope clients aren't really using
-                struct nfs4_stateid *stp;
+         * release_lockowner much, but eventually we have to fix these
+         * data structures. */
-                /* check for any locks held by any stateid
+        INIT_LIST_HEAD(&matches);
-                 * associated with the (lock) stateowner */
+        for (i = 0; i < LOCK_HASH_SIZE; i++) {
-                status = nfserr_locks_held;
+                list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) {
-                list_for_each_entry(stp, &local->so_perfilestate,
+                        if (!cmp_owner_str(sop, owner, clid))
-                                st_perfilestate) {
+                                continue;
-                        if (check_for_locks(stp->st_vfs_file, local))
+                        list_for_each_entry(stp, &sop->so_stateids,
-                                goto out;
+                                        st_perstateowner) {
+                                if (check_for_locks(stp->st_vfs_file, sop))
+                                        goto out;
+                                /* Note: so_perclient unused for lockowners,
+                                 * so it's OK to fool with here. */
+                                list_add(&sop->so_perclient, &matches);
+                        }
                }
-                /* no locks held by (lock) stateowner */
+        }
-                status = nfs_ok;
+        /* Clients probably won't expect us to return with some (but not all)
-                release_stateowner(local);
+         * of the lockowner state released; so don't release any until all
+         * have been checked. */
+        status = nfs_ok;
+        list_for_each_entry(sop, &matches, so_perclient) {
+                release_stateowner(sop);
        }
 out:
        nfs4_unlock_state();
@@ -3010,39 +3092,38 @@ out:
 }
 static inline struct nfs4_client_reclaim *
-alloc_reclaim(int namelen)
+alloc_reclaim(void)
 {
-        struct nfs4_client_reclaim *crp = NULL;
+        return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
+}
-        crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
+int
-        if (!crp)
+nfs4_has_reclaimed_state(const char *name)
-                return NULL;
+{
-        crp->cr_name.data = kmalloc(namelen, GFP_KERNEL);
+        unsigned int strhashval = clientstr_hashval(name);
-        if (!crp->cr_name.data) {
+        struct nfs4_client *clp;
-                kfree(crp);
-                return NULL;
+        clp = find_confirmed_client_by_str(name, strhashval);
-        }
+        return clp ? 1 : 0;
-        return crp;
 }
 /*
 * failure => all reset bets are off, nfserr_no_grace...
 */
-static int
+int
-nfs4_client_to_reclaim(char *name, int namlen)
+nfs4_client_to_reclaim(const char *name)
 {
        unsigned int strhashval;
        struct nfs4_client_reclaim *crp = NULL;
-        dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name);
+        dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
-        crp = alloc_reclaim(namlen);
+        crp = alloc_reclaim();
        if (!crp)
                return 0;
-        strhashval = clientstr_hashval(name, namlen);
+        strhashval = clientstr_hashval(name);
        INIT_LIST_HEAD(&crp->cr_strhash);
        list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
-        memcpy(crp->cr_name.data, name, namlen);
+        memcpy(crp->cr_recdir, name, HEXDIR_LEN);
-        crp->cr_name.len = namlen;
        reclaim_str_hashtbl_size++;
        return 1;
 }
@@ -3053,13 +3134,11 @@ nfs4_release_reclaim(void)
        struct nfs4_client_reclaim *crp = NULL;
        int i;
-        BUG_ON(!nfs4_reclaim_init);
        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
                while (!list_empty(&reclaim_str_hashtbl[i])) {
                        crp = list_entry(reclaim_str_hashtbl[i].next,
                                        struct nfs4_client_reclaim, cr_strhash);
                        list_del(&crp->cr_strhash);
-                        kfree(crp->cr_name.data);
                        kfree(crp);
                        reclaim_str_hashtbl_size--;
                }
@@ -3069,7 +3148,7 @@ nfs4_release_reclaim(void)
 /*
 * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
-struct nfs4_client_reclaim *
+static struct nfs4_client_reclaim *
 nfs4_find_reclaim_client(clientid_t *clid)
 {
        unsigned int strhashval;
@@ -3082,13 +3161,14 @@ nfs4_find_reclaim_client(clientid_t *clid)
        if (clp == NULL)
                return NULL;
-        dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n",
+        dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
-                            clp->cl_name.len, clp->cl_name.data);
+                            clp->cl_name.len, clp->cl_name.data,
+                            clp->cl_recdir);
        /* find clp->cl_name in reclaim_str_hashtbl */
-        strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len);
+        strhashval = clientstr_hashval(clp->cl_recdir);
        list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
-                if (cmp_name(&crp->cr_name, &clp->cl_name)) {
+                if (same_name(crp->cr_recdir, clp->cl_recdir)) {
                        return crp;
                }
        }
@@ -3101,30 +3181,16 @@ nfs4_find_reclaim_client(clientid_t *clid)
 int
 nfs4_check_open_reclaim(clientid_t *clid)
 {
-        struct nfs4_client_reclaim *crp;
+        return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
-        if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
-                return nfserr_reclaim_bad;
-        return nfs_ok;
 }
+/* initialization to perform at module load time: */
-/* 
+void
- * Start and stop routines
+nfs4_state_init(void)
- */
-static void
-__nfs4_state_init(void)
 {
        int i;
-        time_t grace_time;
-        if (!nfs4_reclaim_init) {
-                for (i = 0; i < CLIENT_HASH_SIZE; i++)
-                        INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
-                reclaim_str_hashtbl_size = 0;
-                nfs4_reclaim_init = 1;
-        }
        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
                INIT_LIST_HEAD(&conf_id_hashtbl[i]);
                INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -3146,26 +3212,46 @@ __nfs4_state_init(void)
                INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
                INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
        }
-        memset(&zerostateid, 0, sizeof(stateid_t));
        memset(&onestateid, ~0, sizeof(stateid_t));
        INIT_LIST_HEAD(&close_lru);
        INIT_LIST_HEAD(&client_lru);
        INIT_LIST_HEAD(&del_recall_lru);
-        spin_lock_init(&recall_lock);
+        for (i = 0; i < CLIENT_HASH_SIZE; i++)
+                INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
+        reclaim_str_hashtbl_size = 0;
+}
+static void
+nfsd4_load_reboot_recovery_data(void)
+{
+        int status;
+        nfs4_lock_state();
+        nfsd4_init_recdir(user_recovery_dirname);
+        status = nfsd4_recdir_load();
+        nfs4_unlock_state();
+        if (status)
+                printk("NFSD: Failure reading reboot recovery data\n");
+}
+/* initialization to perform when the nfsd service is started: */
+static void
+__nfs4_state_start(void)
+{
+        time_t grace_time;
        boot_time = get_seconds();
-        grace_time = max(old_lease_time, lease_time);
+        grace_time = max(user_lease_time, lease_time);
-        if (reclaim_str_hashtbl_size == 0)
+        lease_time = user_lease_time;
-                grace_time = 0;
+        in_grace = 1;
-        if (grace_time)
+        printk("NFSD: starting %ld-second grace period\n", grace_time);
-                printk("NFSD: starting %ld-second grace period\n", grace_time);
+        laundry_wq = create_singlethread_workqueue("nfsd4");
-        grace_end = boot_time + grace_time;
+        queue_delayed_work(laundry_wq, &laundromat_work, grace_time*HZ);
-        INIT_WORK(&laundromat_work,laundromat_main, NULL);
-        schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ);
 }
 int
-nfs4_state_init(void)
+nfs4_state_start(void)
 {
        int status;
@@ -3174,7 +3260,8 @@ nfs4_state_init(void)
        status = nfsd4_init_slabs();
        if (status)
                return status;
-        __nfs4_state_init();
+        nfsd4_load_reboot_recovery_data();
+        __nfs4_state_start();
        nfs4_init = 1;
        return 0;
 }
@@ -3182,14 +3269,7 @@ nfs4_state_init(void)
 int
 nfs4_in_grace(void)
 {
-        return get_seconds() < grace_end;
+        return in_grace;
-}
-void
-set_no_grace(void)
-{
-        printk("NFSD: ERROR in reboot recovery.  State reclaims will fail.\n");
-        grace_end = get_seconds();
 }
 time_t
@@ -3236,21 +3316,11 @@ __nfs4_state_shutdown(void)
                unhash_delegation(dp);
        }
-        release_all_files();
        cancel_delayed_work(&laundromat_work);
-        flush_scheduled_work();
+        flush_workqueue(laundry_wq);
+        destroy_workqueue(laundry_wq);
+        nfsd4_shutdown_recdir();
        nfs4_init = 0;
-        dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n",
-                        list_add_perfile, list_del_perfile);
-        dprintk("NFSD: add_perclient %d del_perclient %d\n",
-                        add_perclient, del_perclient);
-        dprintk("NFSD: alloc_file %d free_file %d\n",
-                        alloc_file, free_file);
-        dprintk("NFSD: vfsopen %d vfsclose %d\n",
-                        vfsopen, vfsclose);
-        dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
-                        alloc_delegation, free_delegation);
 }
 void
@@ -3263,56 +3333,48 @@ nfs4_state_shutdown(void)
        nfs4_unlock_state();
 }
+static void
+nfs4_set_recdir(char *recdir)
+{
+        nfs4_lock_state();
+        strcpy(user_recovery_dirname, recdir);
+        nfs4_unlock_state();
+}
+/*
+ * Change the NFSv4 recovery directory to recdir.
+ */
+int
+nfs4_reset_recoverydir(char *recdir)
+{
+        int status;
+        struct nameidata nd;
+        status = path_lookup(recdir, LOOKUP_FOLLOW, &nd);
+        if (status)
+                return status;
+        status = -ENOTDIR;
+        if (S_ISDIR(nd.dentry->d_inode->i_mode)) {
+                nfs4_set_recdir(recdir);
+                status = 0;
+        }
+        path_release(&nd);
+        return status;
+}
 /*
 * Called when leasetime is changed.
 *
- * if nfsd is not started, simply set the global lease.
+ * The only way the protocol gives us to handle on-the-fly lease changes is to
- *
+ * simulate a reboot.  Instead of doing that, we just wait till the next time
- * if nfsd(s) are running, lease change requires nfsv4 state to be reset.
+ * we start to register any changes in lease time.  If the administrator
- * e.g: boot_time is reset, existing nfs4_client structs are
+ * really wants to change the lease time *now*, they can go ahead and bring
- * used to fill reclaim_str_hashtbl, then all state (except for the
+ * nfsd down and then back up again after changing the lease time.
- * reclaim_str_hashtbl) is re-initialized.
- *
- * if the old lease time is greater than the new lease time, the grace
- * period needs to be set to the old lease time to allow clients to reclaim
- * their state. XXX - we may want to set the grace period == lease time
- * after an initial grace period == old lease time
- *
- * if an error occurs in this process, the new lease is set, but the server
- * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace
- * which means OPEN/LOCK/READ/WRITE will fail during grace period.
- *
- * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and
- * OPEN and LOCK reclaims.
 */
 void
 nfs4_reset_lease(time_t leasetime)
 {
-        struct nfs4_client *clp;
+        lock_kernel();
-        int i;
+        user_lease_time = leasetime;
+        unlock_kernel();
-        printk("NFSD: New leasetime %ld\n",leasetime);
-        if (!nfs4_init)
-                return;
-        nfs4_lock_state();
-        old_lease_time = lease_time;
-        lease_time = leasetime;
-        nfs4_release_reclaim();
-        /* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
-        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
-                list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
-                        if (!nfs4_client_to_reclaim(clp->cl_name.data,
-                                                clp->cl_name.len)) {
-                                nfs4_release_reclaim();
-                                goto init_state;
-                        }
-                }
-        }
-init_state:
-        __nfs4_state_shutdown();
-        __nfs4_state_init();
-        nfs4_unlock_state();
 }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 36a058a112d5..91fb171d2ace 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -136,7 +136,7 @@ xdr_error:					\
        }                                       \
 } while (0)
-u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
+static u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
 {
        /* We want more bytes than seem to be available.
         * Maybe we need a new page, maybe we have just run out
@@ -190,7 +190,7 @@ defer_free(struct nfsd4_compoundargs *argp,
        return 0;
 }
-char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
+static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
 {
        void *new = NULL;
        if (p == argp->tmp) {
@@ -1366,7 +1366,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
        if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
                if ((buflen -= 4) < 0)
                        goto out_resource;
-                WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME );
+                if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
+                        WRITE32(NFS4_FH_VOLATILE_ANY);
+                else
+                        WRITE32(NFS4_FH_VOLATILE_ANY|NFS4_FH_VOL_RENAME);
        }
        if (bmval0 & FATTR4_WORD0_CHANGE) {
                /*
@@ -1969,7 +1972,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open
        case NFS4_OPEN_DELEGATE_READ:
                RESERVE_SPACE(20 + sizeof(stateid_t));
                WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
-                WRITE32(0);
+                WRITE32(open->op_recall);
                /*
                 * TODO: ACE's in delegations
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 161afdcb8f7d..841c562991e8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -51,6 +51,7 @@ enum {
        NFSD_Fh,
        NFSD_Threads,
        NFSD_Leasetime,
+        NFSD_RecoveryDir,
 };
 /*
@@ -66,6 +67,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size);
 static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
 static ssize_t write_threads(struct file *file, char *buf, size_t size);
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
 static ssize_t (*write_op[])(struct file *, char *, size_t) = {
        [NFSD_Svc] = write_svc,
@@ -78,6 +80,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
        [NFSD_Fh] = write_filehandle,
        [NFSD_Threads] = write_threads,
        [NFSD_Leasetime] = write_leasetime,
+        [NFSD_RecoveryDir] = write_recoverydir,
 };
 static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
@@ -349,6 +352,25 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
        return strlen(buf);
 }
+static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
+{
+        char *mesg = buf;
+        char *recdir;
+        int len, status;
+        if (size > PATH_MAX || buf[size-1] != '\n')
+                return -EINVAL;
+        buf[size-1] = 0;
+        recdir = mesg;
+        len = qword_get(&mesg, recdir, size);
+        if (len <= 0)
+                return -EINVAL;
+        status = nfs4_reset_recoverydir(recdir);
+        return strlen(buf);
+}
 /*----------------------------------------------------------------------------*/
 /*
 *      populating the filesystem.
@@ -369,6 +391,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
                [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
 #ifdef CONFIG_NFSD_V4
                [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
+                [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
 #endif
                /* last one */ {""}
        };
@@ -397,9 +420,8 @@ static int __init init_nfsd(void)
        nfsd_cache_init();      /* RPC reply cache */
        nfsd_export_init();     /* Exports table */
        nfsd_lockd_init();      /* lockd->nfsd callbacks */
-#ifdef CONFIG_NFSD_V4
+        nfs4_state_init();      /* NFSv4 locking state */
        nfsd_idmap_init();      /* Name to ID mapping */
-#endif /* CONFIG_NFSD_V4 */
        if (proc_mkdir("fs/nfs", NULL)) {
                struct proc_dir_entry *entry;
                entry = create_proc_entry("fs/nfs/exports", 0, NULL);
@@ -426,9 +448,7 @@ static void __exit exit_nfsd(void)
        remove_proc_entry("fs/nfs", NULL);
        nfsd_stat_shutdown();
        nfsd_lockd_shutdown();
-#ifdef CONFIG_NFSD_V4
        nfsd_idmap_shutdown();
-#endif /* CONFIG_NFSD_V4 */
        unregister_filesystem(&nfsd_fs_type);
 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 904df604e86b..07b9a065e9da 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -95,7 +95,7 @@ nfsd_svc(unsigned short port, int nrservs)
        error = nfsd_racache_init(2*nrservs);
        if (error<0)
                goto out;
-        error = nfs4_state_init();
+        error = nfs4_state_start();
        if (error<0)
                goto out;
        if (!nfsd_serv) {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ae3940dc85cc..de340ffd33c3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -50,7 +50,6 @@
 #include <linux/posix_acl.h>
 #ifdef CONFIG_NFSD_V4
 #include <linux/posix_acl_xattr.h>
-#include <linux/xattr_acl.h>
 #include <linux/xattr.h>
 #include <linux/nfs4.h>
 #include <linux/nfs4_acl.h>
@@ -425,13 +424,13 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
                goto out_nfserr;
        if (pacl) {
-                error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS);
+                error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
                if (error < 0)
                        goto out_nfserr;
        }
        if (dpacl) {
-                error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT);
+                error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
                if (error < 0)
                        goto out_nfserr;
        }
@@ -498,7 +497,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
        struct posix_acl *pacl = NULL, *dpacl = NULL;
        unsigned int flags = 0;
-        pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS);
+        pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
        if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
                pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
        if (IS_ERR(pacl)) {
@@ -508,7 +507,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
        }
        if (S_ISDIR(inode->i_mode)) {
-                dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT);
+                dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
                if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
                        dpacl = NULL;
                else if (IS_ERR(dpacl)) {
diff --git a/fs/open.c b/fs/open.c
index 963bd81a44c8..3f4a4286fdc4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -21,6 +21,7 @@
 #include <linux/vfs.h>
 #include <asm/uaccess.h>
 #include <linux/fs.h>
+#include <linux/personality.h>
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
@@ -807,7 +808,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
        /* NB: we're sure to have correct a_ops only after f_op->open */
        if (f->f_flags & O_DIRECT) {
-                if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) {
+                if (!f->f_mapping->a_ops ||
+                    ((!f->f_mapping->a_ops->direct_IO) &&
+                    (!f->f_mapping->a_ops->get_xip_page))) {
                        fput(f);
                        f = ERR_PTR(-EINVAL);
                }
@@ -933,31 +936,27 @@ EXPORT_SYMBOL(fd_install);
 asmlinkage long sys_open(const char __user * filename, int flags, int mode)
 {
        char * tmp;
-        int fd, error;
+        int fd;
+        if (force_o_largefile())
+                flags |= O_LARGEFILE;
-#if BITS_PER_LONG != 32
-        flags |= O_LARGEFILE;
-#endif
        tmp = getname(filename);
        fd = PTR_ERR(tmp);
        if (!IS_ERR(tmp)) {
                fd = get_unused_fd();
                if (fd >= 0) {
                        struct file *f = filp_open(tmp, flags, mode);
-                        error = PTR_ERR(f);
+                        if (IS_ERR(f)) {
-                        if (IS_ERR(f))
+                                put_unused_fd(fd);
-                                goto out_error;
+                                fd = PTR_ERR(f);
-                        fd_install(fd, f);
+                        } else {
+                                fd_install(fd, f);
+                        }
                }
-out:
                putname(tmp);
        }
        return fd;
-out_error:
-        put_unused_fd(fd);
-        fd = error;
-        goto out;
 }
 EXPORT_SYMBOL_GPL(sys_open);
@@ -980,23 +979,15 @@ asmlinkage long sys_creat(const char __user * pathname, int mode)
 */
 int filp_close(struct file *filp, fl_owner_t id)
 {
-        int retval;
+        int retval = 0;
-        /* Report and clear outstanding errors */
-        retval = filp->f_error;
-        if (retval)
-                filp->f_error = 0;
        if (!file_count(filp)) {
                printk(KERN_ERR "VFS: Close: file count is 0\n");
-                return retval;
+                return 0;
        }
-        if (filp->f_op && filp->f_op->flush) {
+        if (filp->f_op && filp->f_op->flush)
-                int err = filp->f_op->flush(filp);
+                retval = filp->f_op->flush(filp);
-                if (!retval)
-                        retval = err;
-        }
        dnotify_flush(filp, id);
        locks_remove_posix(filp, id);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e31903aadd96..ace151fa4878 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -314,7 +314,7 @@ static int may_ptrace_attach(struct task_struct *task)
             (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
                goto out;
        rmb();
-        if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
+        if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE))
                goto out;
        if (security_ptrace(current, task))
                goto out;
@@ -1113,7 +1113,9 @@ static int task_dumpable(struct task_struct *task)
        if (mm)
                dumpable = mm->dumpable;
        task_unlock(task);
-        return dumpable;
+        if(dumpable == 1)
+                return 1;
+        return 0;
 }
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 63a9fbf1ac51..94b570ad037d 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -451,7 +451,7 @@ static int devices_read_proc(char *page, char **start, off_t off,
                                 int count, int *eof, void *data)
 {
        int len = get_chrdev_list(page);
-        len += get_blkdev_list(page+len);
+        len += get_blkdev_list(page+len, len);
        return proc_calc_metrics(page, start, off, count, eof, len);
 }
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index cd66147cca04..7a8f5595c26f 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -61,7 +61,7 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
                                                ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1;
                                        else {
                                                le  = (struct qnx4_link_info*)de;
-                                                ino = ( le->dl_inode_blk - 1 ) *
+                                                ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) *
                                                        QNX4_INODES_PER_BLOCK +
                                                        le->dl_inode_ndx;
                                        }
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index aa92d6b76a9a..b79162a35478 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -236,7 +236,7 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
        struct buffer_head *bh = NULL;
        struct qnx4_xblk *xblk = NULL;
        struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode);
-        qnx4_nxtnt_t nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
+        u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
        if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) {
                // iblock is in the first extent. This is easy.
@@ -372,7 +372,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
                printk("qnx4: unable to read the superblock\n");
                goto outnobh;
        }
-        if ( le32_to_cpu( *(__u32*)bh->b_data ) != QNX4_SUPER_MAGIC ) {
+        if ( le32_to_cpup((__le32*) bh->b_data) != QNX4_SUPER_MAGIC ) {
                if (!silent)
                        printk("qnx4: wrong fsid in superblock.\n");
                goto out;
diff --git a/fs/quota.c b/fs/quota.c
index 3f0333a51a23..f5d1cff55196 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -149,36 +149,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
        return error;
 }
-static struct super_block *get_super_to_sync(int type)
-{
-        struct list_head *head;
-        int cnt, dirty;
-restart:
-        spin_lock(&sb_lock);
-        list_for_each(head, &super_blocks) {
-                struct super_block *sb = list_entry(head, struct super_block, s_list);
-                /* This test just improves performance so it needn't be reliable... */
-                for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
-                        if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
-                            && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
-                                dirty = 1;
-                if (!dirty)
-                        continue;
-                sb->s_count++;
-                spin_unlock(&sb_lock);
-                down_read(&sb->s_umount);
-                if (!sb->s_root) {
-                        drop_super(sb);
-                        goto restart;
-                }
-                return sb;
-        }
-        spin_unlock(&sb_lock);
-        return NULL;
-}
 static void quota_sync_sb(struct super_block *sb, int type)
 {
        int cnt;
@@ -219,17 +189,35 @@ static void quota_sync_sb(struct super_block *sb, int type)
 void sync_dquots(struct super_block *sb, int type)
 {
+        int cnt, dirty;
        if (sb) {
                if (sb->s_qcop->quota_sync)
                        quota_sync_sb(sb, type);
+                return;
        }
-        else {
-                while ((sb = get_super_to_sync(type)) != NULL) {
+        spin_lock(&sb_lock);
-                        if (sb->s_qcop->quota_sync)
+restart:
-                                quota_sync_sb(sb, type);
+        list_for_each_entry(sb, &super_blocks, s_list) {
-                        drop_super(sb);
+                /* This test just improves performance so it needn't be reliable... */
-                }
+                for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
+                        if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
+                            && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
+                                dirty = 1;
+                if (!dirty)
+                        continue;
+                sb->s_count++;
+                spin_unlock(&sb_lock);
+                down_read(&sb->s_umount);
+                if (sb->s_root && sb->s_qcop->quota_sync)
+                        quota_sync_sb(sb, type);
+                up_read(&sb->s_umount);
+                spin_lock(&sb_lock);
+                if (__put_super_and_need_restart(sb))
+                        goto restart;
        }
+        spin_unlock(&sb_lock);
 }
 /* Copy parameters and call proper function */
diff --git a/fs/read_write.c b/fs/read_write.c
index c4c2bee373ed..9292f5fa4d62 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -203,6 +203,16 @@ Einval:
        return -EINVAL;
 }
+static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
+{
+        set_current_state(TASK_UNINTERRUPTIBLE);
+        if (!kiocbIsKicked(iocb))
+                schedule();
+        else
+                kiocbClearKicked(iocb);
+        __set_current_state(TASK_RUNNING);
+}
 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 {
        struct kiocb kiocb;
@@ -210,7 +220,10 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = *ppos;
-        ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos);
+        while (-EIOCBRETRY ==
+                (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
+                wait_on_retry_sync_kiocb(&kiocb);
        if (-EIOCBQUEUED == ret)
                ret = wait_on_sync_kiocb(&kiocb);
        *ppos = kiocb.ki_pos;
@@ -258,7 +271,10 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = *ppos;
-        ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos);
+        while (-EIOCBRETRY ==
+               (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
+                wait_on_retry_sync_kiocb(&kiocb);
        if (-EIOCBQUEUED == ret)
                ret = wait_on_sync_kiocb(&kiocb);
        *ppos = kiocb.ki_pos;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 2230afff1870..12e91209544e 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -201,7 +201,7 @@ static int reiserfs_allocate_blocks_for_region(
    /* If we came here, it means we absolutely need to open a transaction,
       since we need to allocate some blocks */
    reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that.
-    res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); // Wish I know if this number enough
+    res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough
    if (res)
        goto error_exit;
    reiserfs_update_inode_transaction(inode) ;
@@ -576,7 +576,7 @@ error_exit:
        int err;
        // update any changes we made to blk count
        reiserfs_update_sd(th, inode);
-        err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS);
+        err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));
        if (err)
            res = err;
    }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2711dff1b7b4..0d5817f81972 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -28,7 +28,7 @@ static int reiserfs_prepare_write(struct file *f, struct page *page,
 void reiserfs_delete_inode (struct inode * inode)
 {
    /* We need blocks for transaction + (user+group) quota update (possibly delete) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
    struct reiserfs_transaction_handle th ;
  
    reiserfs_write_lock(inode->i_sb);
@@ -591,7 +591,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
       XXX in practically impossible worst case direct2indirect()
       can incur (much) more than 3 balancings.
       quota update for user, group */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
    int version;
    int dangle = 1;
    loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
@@ -2796,12 +2796,15 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
                if (!error) {
                    struct reiserfs_transaction_handle th;
+                    int jbegin_count = 2*(REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)+REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb))+2;
                    /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
-                    journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+                    error = journal_begin(&th, inode->i_sb, jbegin_count);
+                    if (error)
+                        goto out;
                    error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
                    if (error) {
-                        journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+                        journal_end(&th, inode->i_sb, jbegin_count);
                        goto out;
                    }
                    /* Update corresponding info in inode so that everything is in
@@ -2811,7 +2814,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
                    if (attr->ia_valid & ATTR_GID)
                        inode->i_gid = attr->ia_gid;
                    mark_inode_dirty(inode);
-                    journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2);
+                    error = journal_end(&th, inode->i_sb, jbegin_count);
                }
        }
        if (!error)
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3072cfdee959..7b87707acc36 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2631,6 +2631,8 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct sup
  int retval;
  reiserfs_check_lock_depth(p_s_sb, "journal_begin") ;
+  if (nblocks > journal->j_trans_max)
+        BUG();
  PROC_INFO_INC( p_s_sb, journal.journal_being );
  /* set here for journal_join */
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 7d4dc5f5aa8b..4a333255f27a 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -586,7 +586,7 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode,
    int retval;
    struct inode * inode;
    /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
    struct reiserfs_transaction_handle th ;
    int locked;
@@ -653,7 +653,7 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode,
    struct inode * inode;
    struct reiserfs_transaction_handle th ;
    /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
    int locked;
    if (!new_valid_dev(rdev))
@@ -727,7 +727,7 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
    struct inode * inode;
    struct reiserfs_transaction_handle th ;
    /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
    int locked;
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
@@ -829,8 +829,10 @@ static int reiserfs_rmdir (struct inode * dir, struct dentry *dentry)
    /* we will be doing 2 balancings and update 2 stat data, we change quotas
-     * of the owner of the directory and of the owner of the parent directory */
+     * of the owner of the directory and of the owner of the parent directory.
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+     * The quota structure is possibly deleted only on last iput => outside
+     * of this transaction */
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
    reiserfs_write_lock(dir->i_sb);
    retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
@@ -913,9 +915,10 @@ static int reiserfs_unlink (struct inode * dir, struct dentry *dentry)
    inode = dentry->d_inode;
    /* in this transaction we can be doing at max two balancings and update
-       two stat datas, we change quotas of the owner of the directory and of
+     * two stat datas, we change quotas of the owner of the directory and of
-       the owner of the parent directory */
+     * the owner of the parent directory. The quota structure is possibly
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+     * deleted only on iput => outside of this transaction */
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
    reiserfs_write_lock(dir->i_sb);
    retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
@@ -1000,7 +1003,7 @@ static int reiserfs_symlink (struct inode * parent_dir,
    struct reiserfs_transaction_handle th ;
    int mode = S_IFLNK | S_IRWXUGO;
    /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS);
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
    if (!(inode = new_inode(parent_dir->i_sb))) {
        return -ENOMEM ;
@@ -1076,7 +1079,7 @@ static int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct
    struct inode *inode = old_dentry->d_inode;
    struct reiserfs_transaction_handle th ;
    /* We need blocks for transaction + update of quotas for the owners of the directory */
-    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS;
+    int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
    reiserfs_write_lock(dir->i_sb);
    if (inode->i_nlink >= REISERFS_LINK_MAX) {
@@ -1196,7 +1199,7 @@ static int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry,
       pointed initially and (5) maybe block containing ".." of
       renamed directory
       quota updates: two parent directories */
-    jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS;
+    jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
    old_inode = old_dentry->d_inode;
    new_dentry_inode = new_dentry->d_inode;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index c47f8fd31a2d..63158491e152 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -223,7 +223,7 @@ extern struct tree_balance * cur_tb;
 const struct reiserfs_key  MIN_KEY = {0, 0, {{0, 0},}};
 /* Maximal possible key. It is never in the tree. */
-const struct reiserfs_key  MAX_KEY = {
+static const struct reiserfs_key  MAX_KEY = {
        __constant_cpu_to_le32(0xffffffff),
        __constant_cpu_to_le32(0xffffffff),
        {{__constant_cpu_to_le32(0xffffffff),
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b35b87744983..660aefca1fd2 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -866,8 +866,9 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
        {"jdev",        .arg_required = 'j', .values = NULL},
        {"nolargeio",   .arg_required = 'w', .values = NULL},
        {"commit",      .arg_required = 'c', .values = NULL},
-        {"usrquota",},
+        {"usrquota",    .setmask = 1<<REISERFS_QUOTA},
-        {"grpquota",},
+        {"grpquota",    .setmask = 1<<REISERFS_QUOTA},
+        {"noquota",     .clrmask = 1<<REISERFS_QUOTA},
        {"errors",      .arg_required = 'e', .values = error_actions},
        {"usrjquota",   .arg_required = 'u'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL},
        {"grpjquota",   .arg_required = 'g'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL},
@@ -964,6 +965,7 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
                    return 0;
                }
                strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
+                *mount_options |= 1<<REISERFS_QUOTA;
            }
            else {
                if (REISERFS_SB(s)->s_qf_names[qtype]) {
@@ -995,7 +997,13 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
        reiserfs_warning(s, "reiserfs_parse_options: journalled quota format not specified.");
        return 0;
    }
+    /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
+    if (!(*mount_options & (1<<REISERFS_QUOTA)) && sb_any_quota_enabled(s)) {
+        reiserfs_warning(s, "reiserfs_parse_options: quota options must be present when quota is turned on.");
+        return 0;
+    }
 #endif
    return 1;
 }
@@ -1105,6 +1113,7 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a
  safe_mask |= 1 << REISERFS_ERROR_RO;
  safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
  safe_mask |= 1 << REISERFS_ERROR_PANIC;
+  safe_mask |= 1 << REISERFS_QUOTA;
  /* Update the bitmask, taking care to keep
   * the bits we're not allowed to change here */
@@ -1841,13 +1850,18 @@ static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf)
 static int reiserfs_dquot_initialize(struct inode *inode, int type)
 {
    struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
    /* We may create quota structure so we need to reserve enough blocks */
    reiserfs_write_lock(inode->i_sb);
-    journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
+    if (ret)
+        goto out;
    ret = dquot_initialize(inode, type);
-    journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
+    if (!ret && err)
+        ret = err;
+out:
    reiserfs_write_unlock(inode->i_sb);
    return ret;
 }
@@ -1855,13 +1869,18 @@ static int reiserfs_dquot_initialize(struct inode *inode, int type)
 static int reiserfs_dquot_drop(struct inode *inode)
 {
    struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
    /* We may delete quota structure so we need to reserve enough blocks */
    reiserfs_write_lock(inode->i_sb);
-    journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
+    if (ret)
+        goto out;
    ret = dquot_drop(inode);
-    journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
+    if (!ret && err)
+        ret = err;
+out:
    reiserfs_write_unlock(inode->i_sb);
    return ret;
 }
@@ -1869,12 +1888,17 @@ static int reiserfs_dquot_drop(struct inode *inode)
 static int reiserfs_write_dquot(struct dquot *dquot)
 {
    struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
    reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+    if (ret)
+        goto out;
    ret = dquot_commit(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+    if (!ret && err)
+        ret = err;
+out:
    reiserfs_write_unlock(dquot->dq_sb);
    return ret;
 }
@@ -1882,12 +1906,17 @@ static int reiserfs_write_dquot(struct dquot *dquot)
 static int reiserfs_acquire_dquot(struct dquot *dquot)
 {
    struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
    reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+    if (ret)
+        goto out;
    ret = dquot_acquire(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+    if (!ret && err)
+        ret = err;
+out:
    reiserfs_write_unlock(dquot->dq_sb);
    return ret;
 }
@@ -1895,12 +1924,17 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
 static int reiserfs_release_dquot(struct dquot *dquot)
 {
    struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
    reiserfs_write_lock(dquot->dq_sb);
-    journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+    if (ret)
+        goto out;
    ret = dquot_release(dquot);
-    journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS);
+    err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+    if (!ret && err)
+        ret = err;
+out:
    reiserfs_write_unlock(dquot->dq_sb);
    return ret;
 }
@@ -1920,39 +1954,29 @@ static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
 static int reiserfs_write_info(struct super_block *sb, int type)
 {
    struct reiserfs_transaction_handle th;
-    int ret;
+    int ret, err;
    /* Data block + inode block */
    reiserfs_write_lock(sb);
-    journal_begin(&th, sb, 2);
+    ret = journal_begin(&th, sb, 2);
+    if (ret)
+        goto out;
    ret = dquot_commit_info(sb, type);
-    journal_end(&th, sb, 2);
+    err = journal_end(&th, sb, 2);
+    if (!ret && err)
+        ret = err;
+out:
    reiserfs_write_unlock(sb);
    return ret;
 }
 /*
- * Turn on quotas during mount time - we need to find
+ * Turn on quotas during mount time - we need to find the quota file and such...
- * the quota file and such...
 */
 static int reiserfs_quota_on_mount(struct super_block *sb, int type)
 {
-    int err;
+        return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
-    struct dentry *dentry;
+                        REISERFS_SB(sb)->s_jquota_fmt, type);
-    struct qstr name = { .name = REISERFS_SB(sb)->s_qf_names[type],
-                         .hash = 0,
-                         .len = strlen(REISERFS_SB(sb)->s_qf_names[type])};
-    dentry = lookup_hash(&name, sb->s_root);
-    if (IS_ERR(dentry))
-            return PTR_ERR(dentry);
-    err = vfs_quota_on_mount(type, REISERFS_SB(sb)->s_jquota_fmt, dentry);
-    /* Now invalidate and put the dentry - quota got its own reference
-     * to inode and dentry has at least wrong hash so we had better
-     * throw it away */
-    d_invalidate(dentry);
-    dput(dentry);
-    return err;
 }
 /*
@@ -1963,6 +1987,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, ch
    int err;
    struct nameidata nd;
+    if (!(REISERFS_SB(sb)->s_mount_opt & (1<<REISERFS_QUOTA)))
+        return -EINVAL;
    err = path_lookup(path, LOOKUP_FOLLOW, &nd);
    if (err)
        return err;
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index e302071903a1..c312881c5f53 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -4,7 +4,7 @@
 #include <linux/errno.h>
 #include <linux/pagemap.h>
 #include <linux/xattr.h>
-#include <linux/xattr_acl.h>
+#include <linux/posix_acl_xattr.h>
 #include <linux/reiserfs_xattr.h>
 #include <linux/reiserfs_acl.h>
 #include <asm/uaccess.h>
@@ -192,11 +192,11 @@ reiserfs_get_acl(struct inode *inode, int type)
        switch (type) {
            case ACL_TYPE_ACCESS:
-                name = XATTR_NAME_ACL_ACCESS;
+                name = POSIX_ACL_XATTR_ACCESS;
                p_acl = &reiserfs_i->i_acl_access;
                break;
            case ACL_TYPE_DEFAULT:
-                name = XATTR_NAME_ACL_DEFAULT;
+                name = POSIX_ACL_XATTR_DEFAULT;
                p_acl = &reiserfs_i->i_acl_default;
                break;
            default:
@@ -260,7 +260,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
        switch (type) {
            case ACL_TYPE_ACCESS:
-                name = XATTR_NAME_ACL_ACCESS;
+                name = POSIX_ACL_XATTR_ACCESS;
                p_acl = &reiserfs_i->i_acl_access;
                if (acl) {
                    mode_t mode = inode->i_mode;
@@ -275,7 +275,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                }
                break;
            case ACL_TYPE_DEFAULT:
-                name = XATTR_NAME_ACL_DEFAULT;
+                name = POSIX_ACL_XATTR_DEFAULT;
                p_acl = &reiserfs_i->i_acl_default;
                if (!S_ISDIR (inode->i_mode))
                    return acl ? -EACCES : 0;
@@ -468,7 +468,7 @@ static int
 posix_acl_access_get(struct inode *inode, const char *name,
                          void *buffer, size_t size)
 {
-        if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+        if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
                return -EINVAL;
        return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
 }
@@ -477,7 +477,7 @@ static int
 posix_acl_access_set(struct inode *inode, const char *name,
                          const void *value, size_t size, int flags)
 {
-        if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+        if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
                return -EINVAL;
        return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
 }
@@ -487,7 +487,7 @@ posix_acl_access_del (struct inode *inode, const char *name)
 {
    struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
    struct posix_acl **acl = &reiserfs_i->i_acl_access;
-    if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1)
+    if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
        return -EINVAL;
    if (!IS_ERR (*acl) && *acl) {
        posix_acl_release (*acl);
@@ -510,7 +510,7 @@ posix_acl_access_list (struct inode *inode, const char *name, int namelen, char
 }
 struct reiserfs_xattr_handler posix_acl_access_handler = {
-        .prefix = XATTR_NAME_ACL_ACCESS,
+        .prefix = POSIX_ACL_XATTR_ACCESS,
        .get = posix_acl_access_get,
        .set = posix_acl_access_set,
        .del = posix_acl_access_del,
@@ -521,7 +521,7 @@ static int
 posix_acl_default_get (struct inode *inode, const char *name,
                           void *buffer, size_t size)
 {
-        if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+        if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
                return -EINVAL;
        return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
 }
@@ -530,7 +530,7 @@ static int
 posix_acl_default_set(struct inode *inode, const char *name,
                           const void *value, size_t size, int flags)
 {
-        if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+        if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
                return -EINVAL;
        return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
 }
@@ -540,7 +540,7 @@ posix_acl_default_del (struct inode *inode, const char *name)
 {
    struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
    struct posix_acl **acl = &reiserfs_i->i_acl_default;
-    if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1)
+    if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
        return -EINVAL;
    if (!IS_ERR (*acl) && *acl) {
        posix_acl_release (*acl);
@@ -563,7 +563,7 @@ posix_acl_default_list (struct inode *inode, const char *name, int namelen, char
 }
 struct reiserfs_xattr_handler posix_acl_default_handler = {
-        .prefix = XATTR_NAME_ACL_DEFAULT,
+        .prefix = POSIX_ACL_XATTR_DEFAULT,
        .get = posix_acl_default_get,
        .set = posix_acl_default_set,
        .del = posix_acl_default_del,
diff --git a/fs/super.c b/fs/super.c
index 573bcc81bb82..25bc1ec6bc5d 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -341,20 +341,22 @@ static inline void write_super(struct super_block *sb)
 */
 void sync_supers(void)
 {
-        struct super_block * sb;
+        struct super_block *sb;
-restart:
        spin_lock(&sb_lock);
-        sb = sb_entry(super_blocks.next);
+restart:
-        while (sb != sb_entry(&super_blocks))
+        list_for_each_entry(sb, &super_blocks, s_list) {
                if (sb->s_dirt) {
                        sb->s_count++;
                        spin_unlock(&sb_lock);
                        down_read(&sb->s_umount);
                        write_super(sb);
-                        drop_super(sb);
+                        up_read(&sb->s_umount);
-                        goto restart;
+                        spin_lock(&sb_lock);
-                } else
+                        if (__put_super_and_need_restart(sb))
-                        sb = sb_entry(sb->s_list.next);
+                                goto restart;
+                }
+        }
        spin_unlock(&sb_lock);
 }
@@ -381,20 +383,16 @@ void sync_filesystems(int wait)
        down(&mutex);           /* Could be down_interruptible */
        spin_lock(&sb_lock);
-        for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
+        list_for_each_entry(sb, &super_blocks, s_list) {
-                        sb = sb_entry(sb->s_list.next)) {
                if (!sb->s_op->sync_fs)
                        continue;
                if (sb->s_flags & MS_RDONLY)
                        continue;
                sb->s_need_sync_fs = 1;
        }
-        spin_unlock(&sb_lock);
 restart:
-        spin_lock(&sb_lock);
+        list_for_each_entry(sb, &super_blocks, s_list) {
-        for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
-                        sb = sb_entry(sb->s_list.next)) {
                if (!sb->s_need_sync_fs)
                        continue;
                sb->s_need_sync_fs = 0;
@@ -405,8 +403,11 @@ restart:
                down_read(&sb->s_umount);
                if (sb->s_root && (wait || sb->s_dirt))
                        sb->s_op->sync_fs(sb, wait);
-                drop_super(sb);
+                up_read(&sb->s_umount);
-                goto restart;
+                /* restart only when sb is no longer on the list */
+                spin_lock(&sb_lock);
+                if (__put_super_and_need_restart(sb))
+                        goto restart;
        }
        spin_unlock(&sb_lock);
        up(&mutex);
@@ -422,21 +423,25 @@ restart:
 struct super_block * get_super(struct block_device *bdev)
 {
-        struct list_head *p;
+        struct super_block *sb;
        if (!bdev)
                return NULL;
-rescan:
        spin_lock(&sb_lock);
-        list_for_each(p, &super_blocks) {
+rescan:
-                struct super_block *s = sb_entry(p);
+        list_for_each_entry(sb, &super_blocks, s_list) {
-                if (s->s_bdev == bdev) {
+                if (sb->s_bdev == bdev) {
-                        s->s_count++;
+                        sb->s_count++;
                        spin_unlock(&sb_lock);
-                        down_read(&s->s_umount);
+                        down_read(&sb->s_umount);
-                        if (s->s_root)
+                        if (sb->s_root)
-                                return s;
+                                return sb;
-                        drop_super(s);
+                        up_read(&sb->s_umount);
-                        goto rescan;
+                        /* restart only when sb is no longer on the list */
+                        spin_lock(&sb_lock);
+                        if (__put_super_and_need_restart(sb))
+                                goto rescan;
                }
        }
        spin_unlock(&sb_lock);
@@ -447,20 +452,22 @@ EXPORT_SYMBOL(get_super);
 
 struct super_block * user_get_super(dev_t dev)
 {
-        struct list_head *p;
+        struct super_block *sb;
-rescan:
        spin_lock(&sb_lock);
-        list_for_each(p, &super_blocks) {
+rescan:
-                struct super_block *s = sb_entry(p);
+        list_for_each_entry(sb, &super_blocks, s_list) {
-                if (s->s_dev ==  dev) {
+                if (sb->s_dev ==  dev) {
-                        s->s_count++;
+                        sb->s_count++;
                        spin_unlock(&sb_lock);
-                        down_read(&s->s_umount);
+                        down_read(&sb->s_umount);
-                        if (s->s_root)
+                        if (sb->s_root)
-                                return s;
+                                return sb;
-                        drop_super(s);
+                        up_read(&sb->s_umount);
-                        goto rescan;
+                        /* restart only when sb is no longer on the list */
+                        spin_lock(&sb_lock);
+                        if (__put_super_and_need_restart(sb))
+                                goto rescan;
                }
        }
        spin_unlock(&sb_lock);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 37d7a6875d86..59734ba1ee60 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -8,6 +8,7 @@
 #include <linux/mount.h>
 #include <linux/module.h>
 #include <linux/kobject.h>
+#include <linux/namei.h>
 #include "sysfs.h"
 DECLARE_RWSEM(sysfs_rename_sem);
@@ -99,7 +100,7 @@ static int create_dir(struct kobject * k, struct dentry * p,
        umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
        down(&p->d_inode->i_sem);
-        *d = sysfs_get_dentry(p,n);
+        *d = lookup_one_len(n, p, strlen(n));
        if (!IS_ERR(*d)) {
                error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
                if (!error) {
@@ -315,7 +316,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
        down(&parent->d_inode->i_sem);
-        new_dentry = sysfs_get_dentry(parent, new_name);
+        new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
        if (!IS_ERR(new_dentry)) {
                if (!new_dentry->d_inode) {
                        error = kobject_set_name(kobj, "%s", new_name);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 849aac115460..d72c1ce48559 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -5,6 +5,7 @@
 #include <linux/module.h>
 #include <linux/dnotify.h>
 #include <linux/kobject.h>
+#include <linux/namei.h>
 #include <asm/uaccess.h>
 #include <asm/semaphore.h>
@@ -13,7 +14,7 @@
 #define to_subsys(k) container_of(k,struct subsystem,kset.kobj)
 #define to_sattr(a) container_of(a,struct subsys_attribute,attr)
-/**
+/*
 * Subsystem file operations.
 * These operations allow subsystems to have files that can be 
 * read/written. 
@@ -191,8 +192,9 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
 /**
 *      flush_write_buffer - push buffer to kobject.
- *      @file:          file pointer.
+ *      @dentry:        dentry to the attribute
 *      @buffer:        data buffer for file.
+ *      @count:         number of bytes
 *
 *      Get the correct pointers for the kobject and the attribute we're
 *      dealing with, then call the store() method for the attribute, 
@@ -400,7 +402,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
        int res = -ENOENT;
        down(&dir->d_inode->i_sem);
-        victim = sysfs_get_dentry(dir, attr->name);
+        victim = lookup_one_len(attr->name, dir, strlen(attr->name));
        if (!IS_ERR(victim)) {
                /* make sure dentry is really there */
                if (victim->d_inode && 
@@ -443,7 +445,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
        int res = -ENOENT;
        down(&dir->d_inode->i_sem);
-        victim = sysfs_get_dentry(dir, attr->name);
+        victim = lookup_one_len(attr->name, dir, strlen(attr->name));
        if (!IS_ERR(victim)) {
                if (victim->d_inode &&
                    (victim->d_parent->d_inode == dir->d_inode)) {
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index f11ac5ea7021..122145b0895c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -11,6 +11,7 @@
 #include <linux/kobject.h>
 #include <linux/module.h>
 #include <linux/dcache.h>
+#include <linux/namei.h>
 #include <linux/err.h>
 #include "sysfs.h"
@@ -68,7 +69,8 @@ void sysfs_remove_group(struct kobject * kobj,
        struct dentry * dir;
        if (grp->name)
-                dir = sysfs_get_dentry(kobj->dentry,grp->name);
+                dir = lookup_one_len(grp->name, kobj->dentry,
+                                strlen(grp->name));
        else
                dir = dget(kobj->dentry);
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 565cac1d4200..8de13bafaa76 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -166,16 +166,6 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
        return error;
 }
-struct dentry * sysfs_get_dentry(struct dentry * parent, const char * name)
-{
-        struct qstr qstr;
-        qstr.name = name;
-        qstr.len = strlen(name);
-        qstr.hash = full_name_hash(name,qstr.len);
-        return lookup_hash(&qstr,parent);
-}
 /*
 * Get the name for corresponding element represented by the given sysfs_dirent
 */
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 29da6f5f07c8..3f8953e0e5d0 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -7,7 +7,6 @@ extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
 extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
                                umode_t, int);
-extern struct dentry * sysfs_get_dentry(struct dentry *, const char *);
 extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
 extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 93ce257cd149..a3a4b5aaf5d9 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -149,11 +149,12 @@ linvfs_unwritten_convert(
 */
 STATIC void
 linvfs_unwritten_convert_direct(
-        struct inode    *inode,
+        struct kiocb    *iocb,
        loff_t          offset,
        ssize_t         size,
        void            *private)
 {
+        struct inode    *inode = iocb->ki_filp->f_dentry->d_inode;
        ASSERT(!private || inode == (struct inode *)private);
        /* private indicates an unwritten extent lay beneath this IO */
author	Anton Altaparmakov <aia21@cantab.net>	2005-06-25 09:27:27 -0400
committer	Anton Altaparmakov <aia21@cantab.net>	2005-06-25 09:27:27 -0400
commit	38b22b6e9f46ab8f73ef5734f0e0a000766a9258 (patch)
tree	2ccc41ef55918d3af43e444bde7648562a031559 /fs
parent	3357d4c75f1fb67e7304998c4ad4e9a9fed66fa4 (diff)
parent	b3e112bcc19abd8e9657dca34a87316786e096f3 (diff)