aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig19
-rw-r--r--fs/block_dev.c5
-rw-r--r--fs/buffer.c57
-rw-r--r--fs/char_dev.c13
-rw-r--r--fs/debugfs/inode.c12
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/dquot.c16
-rw-r--r--fs/eventpoll.c195
-rw-r--r--fs/exec.c23
-rw-r--r--fs/ext2/Makefile1
-rw-r--r--fs/ext2/acl.c12
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/ext2.h2
-rw-r--r--fs/ext2/file.c14
-rw-r--r--fs/ext2/inode.c31
-rw-r--r--fs/ext2/namei.c12
-rw-r--r--fs/ext2/super.c27
-rw-r--r--fs/ext2/xip.c80
-rw-r--r--fs/ext2/xip.h25
-rw-r--r--fs/ext3/acl.c17
-rw-r--r--fs/ext3/acl.h2
-rw-r--r--fs/ext3/inode.c7
-rw-r--r--fs/ext3/namei.c37
-rw-r--r--fs/ext3/super.c55
-rw-r--r--fs/ext3/xattr.c2
-rw-r--r--fs/file_table.c57
-rw-r--r--fs/fs-writeback.c64
-rw-r--r--fs/inode.c16
-rw-r--r--fs/jfs/acl.c11
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/super.c1
-rw-r--r--fs/jfs/xattr.c7
-rw-r--r--fs/lockd/svc.c4
-rw-r--r--fs/namei.c20
-rw-r--r--fs/namespace.c2
-rw-r--r--fs/nfs/direct.c5
-rw-r--r--fs/nfsd/Makefile2
-rw-r--r--fs/nfsd/nfs4acl.c4
-rw-r--r--fs/nfsd/nfs4callback.c13
-rw-r--r--fs/nfsd/nfs4idmap.c12
-rw-r--r--fs/nfsd/nfs4proc.c26
-rw-r--r--fs/nfsd/nfs4recover.c431
-rw-r--r--fs/nfsd/nfs4state.c1028
-rw-r--r--fs/nfsd/nfs4xdr.c11
-rw-r--r--fs/nfsd/nfsctl.c28
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/vfs.c9
-rw-r--r--fs/open.c45
-rw-r--r--fs/proc/base.c6
-rw-r--r--fs/proc/proc_misc.c2
-rw-r--r--fs/qnx4/dir.c2
-rw-r--r--fs/qnx4/inode.c4
-rw-r--r--fs/quota.c60
-rw-r--r--fs/read_write.c20
-rw-r--r--fs/reiserfs/file.c4
-rw-r--r--fs/reiserfs/inode.c13
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/reiserfs/namei.c25
-rw-r--r--fs/reiserfs/stree.c2
-rw-r--r--fs/reiserfs/super.c102
-rw-r--r--fs/reiserfs/xattr_acl.c26
-rw-r--r--fs/super.c83
-rw-r--r--fs/sysfs/dir.c5
-rw-r--r--fs/sysfs/file.c10
-rw-r--r--fs/sysfs/group.c4
-rw-r--r--fs/sysfs/inode.c10
-rw-r--r--fs/sysfs/sysfs.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c3
68 files changed, 1833 insertions, 1019 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index a7c0cc3203cb..8157f2e2d515 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -50,6 +50,23 @@ config EXT2_FS_SECURITY
50 If you are not using a security module that requires using 50 If you are not using a security module that requires using
51 extended attributes for file security labels, say N. 51 extended attributes for file security labels, say N.
52 52
53config EXT2_FS_XIP
54 bool "Ext2 execute in place support"
55 depends on EXT2_FS
56 help
57 Execute in place can be used on memory-backed block devices. If you
58 enable this option, you can select to mount block devices which are
59 capable of this feature without using the page cache.
60
61 If you do not use a block device that is capable of using this,
62 or if unsure, say N.
63
64config FS_XIP
65# execute in place
66 bool
67 depends on EXT2_FS_XIP
68 default y
69
53config EXT3_FS 70config EXT3_FS
54 tristate "Ext3 journalling file system support" 71 tristate "Ext3 journalling file system support"
55 help 72 help
@@ -1413,6 +1430,8 @@ config NFSD_V4
1413 bool "Provide NFSv4 server support (EXPERIMENTAL)" 1430 bool "Provide NFSv4 server support (EXPERIMENTAL)"
1414 depends on NFSD_V3 && EXPERIMENTAL 1431 depends on NFSD_V3 && EXPERIMENTAL
1415 select NFSD_TCP 1432 select NFSD_TCP
1433 select CRYPTO_MD5
1434 select CRYPTO
1416 help 1435 help
1417 If you would like to include the NFSv4 server as well as the NFSv2 1436 If you would like to include the NFSv4 server as well as the NFSv2
1418 and NFSv3 servers, say Y here. This feature is experimental, and 1437 and NFSv3 servers, say Y here. This feature is experimental, and
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c0cbd1bc1a02..e0df94c37b7e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -777,8 +777,7 @@ static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf,
777 return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); 777 return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos);
778} 778}
779 779
780static int block_ioctl(struct inode *inode, struct file *file, unsigned cmd, 780static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
781 unsigned long arg)
782{ 781{
783 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); 782 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg);
784} 783}
@@ -803,7 +802,7 @@ struct file_operations def_blk_fops = {
803 .aio_write = blkdev_file_aio_write, 802 .aio_write = blkdev_file_aio_write,
804 .mmap = generic_file_mmap, 803 .mmap = generic_file_mmap,
805 .fsync = block_fsync, 804 .fsync = block_fsync,
806 .ioctl = block_ioctl, 805 .unlocked_ioctl = block_ioctl,
807#ifdef CONFIG_COMPAT 806#ifdef CONFIG_COMPAT
808 .compat_ioctl = compat_blkdev_ioctl, 807 .compat_ioctl = compat_blkdev_ioctl,
809#endif 808#endif
diff --git a/fs/buffer.c b/fs/buffer.c
index 0befa724ab98..13e5938a64f6 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -331,7 +331,7 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
331 return ret; 331 return ret;
332} 332}
333 333
334asmlinkage long sys_fsync(unsigned int fd) 334static long do_fsync(unsigned int fd, int datasync)
335{ 335{
336 struct file * file; 336 struct file * file;
337 struct address_space *mapping; 337 struct address_space *mapping;
@@ -342,14 +342,14 @@ asmlinkage long sys_fsync(unsigned int fd)
342 if (!file) 342 if (!file)
343 goto out; 343 goto out;
344 344
345 mapping = file->f_mapping;
346
347 ret = -EINVAL; 345 ret = -EINVAL;
348 if (!file->f_op || !file->f_op->fsync) { 346 if (!file->f_op || !file->f_op->fsync) {
349 /* Why? We can still call filemap_fdatawrite */ 347 /* Why? We can still call filemap_fdatawrite */
350 goto out_putf; 348 goto out_putf;
351 } 349 }
352 350
351 mapping = file->f_mapping;
352
353 current->flags |= PF_SYNCWRITE; 353 current->flags |= PF_SYNCWRITE;
354 ret = filemap_fdatawrite(mapping); 354 ret = filemap_fdatawrite(mapping);
355 355
@@ -358,7 +358,7 @@ asmlinkage long sys_fsync(unsigned int fd)
358 * which could cause livelocks in fsync_buffers_list 358 * which could cause livelocks in fsync_buffers_list
359 */ 359 */
360 down(&mapping->host->i_sem); 360 down(&mapping->host->i_sem);
361 err = file->f_op->fsync(file, file->f_dentry, 0); 361 err = file->f_op->fsync(file, file->f_dentry, datasync);
362 if (!ret) 362 if (!ret)
363 ret = err; 363 ret = err;
364 up(&mapping->host->i_sem); 364 up(&mapping->host->i_sem);
@@ -373,39 +373,14 @@ out:
373 return ret; 373 return ret;
374} 374}
375 375
376asmlinkage long sys_fdatasync(unsigned int fd) 376asmlinkage long sys_fsync(unsigned int fd)
377{ 377{
378 struct file * file; 378 return do_fsync(fd, 0);
379 struct address_space *mapping; 379}
380 int ret, err;
381
382 ret = -EBADF;
383 file = fget(fd);
384 if (!file)
385 goto out;
386
387 ret = -EINVAL;
388 if (!file->f_op || !file->f_op->fsync)
389 goto out_putf;
390
391 mapping = file->f_mapping;
392
393 current->flags |= PF_SYNCWRITE;
394 ret = filemap_fdatawrite(mapping);
395 down(&mapping->host->i_sem);
396 err = file->f_op->fsync(file, file->f_dentry, 1);
397 if (!ret)
398 ret = err;
399 up(&mapping->host->i_sem);
400 err = filemap_fdatawait(mapping);
401 if (!ret)
402 ret = err;
403 current->flags &= ~PF_SYNCWRITE;
404 380
405out_putf: 381asmlinkage long sys_fdatasync(unsigned int fd)
406 fput(file); 382{
407out: 383 return do_fsync(fd, 1);
408 return ret;
409} 384}
410 385
411/* 386/*
@@ -1951,7 +1926,6 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1951 if (err) 1926 if (err)
1952 break; 1927 break;
1953 if (buffer_new(bh)) { 1928 if (buffer_new(bh)) {
1954 clear_buffer_new(bh);
1955 unmap_underlying_metadata(bh->b_bdev, 1929 unmap_underlying_metadata(bh->b_bdev,
1956 bh->b_blocknr); 1930 bh->b_blocknr);
1957 if (PageUptodate(page)) { 1931 if (PageUptodate(page)) {
@@ -1993,9 +1967,14 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
1993 if (!buffer_uptodate(*wait_bh)) 1967 if (!buffer_uptodate(*wait_bh))
1994 err = -EIO; 1968 err = -EIO;
1995 } 1969 }
1996 if (!err) 1970 if (!err) {
1997 return err; 1971 bh = head;
1998 1972 do {
1973 if (buffer_new(bh))
1974 clear_buffer_new(bh);
1975 } while ((bh = bh->b_this_page) != head);
1976 return 0;
1977 }
1999 /* Error case: */ 1978 /* Error case: */
2000 /* 1979 /*
2001 * Zero out any newly allocated blocks to avoid exposing stale 1980 * Zero out any newly allocated blocks to avoid exposing stale
diff --git a/fs/char_dev.c b/fs/char_dev.c
index c1e3537909fc..e82aac9cc2f5 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -56,10 +56,21 @@ int get_chrdev_list(char *page)
56 56
57 down(&chrdevs_lock); 57 down(&chrdevs_lock);
58 for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) { 58 for (i = 0; i < ARRAY_SIZE(chrdevs) ; i++) {
59 for (cd = chrdevs[i]; cd; cd = cd->next) 59 for (cd = chrdevs[i]; cd; cd = cd->next) {
60 /*
61 * if the current name, plus the 5 extra characters
62 * in the device line for this entry
63 * would run us off the page, we're done
64 */
65 if ((len+strlen(cd->name) + 5) >= PAGE_SIZE)
66 goto page_full;
67
68
60 len += sprintf(page+len, "%3d %s\n", 69 len += sprintf(page+len, "%3d %s\n",
61 cd->major, cd->name); 70 cd->major, cd->name);
71 }
62 } 72 }
73page_full:
63 up(&chrdevs_lock); 74 up(&chrdevs_lock);
64 75
65 return len; 76 return len;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b529786699e7..a86ac4aeaedb 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -110,16 +110,6 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
110 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); 110 return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
111} 111}
112 112
113static struct dentry * get_dentry(struct dentry *parent, const char *name)
114{
115 struct qstr qstr;
116
117 qstr.name = name;
118 qstr.len = strlen(name);
119 qstr.hash = full_name_hash(name,qstr.len);
120 return lookup_hash(&qstr,parent);
121}
122
123static struct super_block *debug_get_sb(struct file_system_type *fs_type, 113static struct super_block *debug_get_sb(struct file_system_type *fs_type,
124 int flags, const char *dev_name, 114 int flags, const char *dev_name,
125 void *data) 115 void *data)
@@ -157,7 +147,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
157 147
158 *dentry = NULL; 148 *dentry = NULL;
159 down(&parent->d_inode->i_sem); 149 down(&parent->d_inode->i_sem);
160 *dentry = get_dentry (parent, name); 150 *dentry = lookup_one_len(name, parent, strlen(name));
161 if (!IS_ERR(dentry)) { 151 if (!IS_ERR(dentry)) {
162 if ((mode & S_IFMT) == S_IFDIR) 152 if ((mode & S_IFMT) == S_IFDIR)
163 error = debugfs_mkdir(parent->d_inode, *dentry, mode); 153 error = debugfs_mkdir(parent->d_inode, *dentry, mode);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1d55e7e67342..0d06097bc995 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -215,7 +215,7 @@ static struct page *dio_get_page(struct dio *dio)
215static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes) 215static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
216{ 216{
217 if (dio->end_io && dio->result) 217 if (dio->end_io && dio->result)
218 dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private); 218 dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private);
219 if (dio->lock_type == DIO_LOCKING) 219 if (dio->lock_type == DIO_LOCKING)
220 up_read(&dio->inode->i_alloc_sem); 220 up_read(&dio->inode->i_alloc_sem);
221} 221}
diff --git a/fs/dquot.c b/fs/dquot.c
index 3995ce7907cc..37212b039a4a 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1519,14 +1519,22 @@ out_path:
1519 * This function is used when filesystem needs to initialize quotas 1519 * This function is used when filesystem needs to initialize quotas
1520 * during mount time. 1520 * during mount time.
1521 */ 1521 */
1522int vfs_quota_on_mount(int type, int format_id, struct dentry *dentry) 1522int vfs_quota_on_mount(struct super_block *sb, char *qf_name,
1523 int format_id, int type)
1523{ 1524{
1525 struct dentry *dentry;
1524 int error; 1526 int error;
1525 1527
1528 dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
1529 if (IS_ERR(dentry))
1530 return PTR_ERR(dentry);
1531
1526 error = security_quota_on(dentry); 1532 error = security_quota_on(dentry);
1527 if (error) 1533 if (!error)
1528 return error; 1534 error = vfs_quota_on_inode(dentry->d_inode, type, format_id);
1529 return vfs_quota_on_inode(dentry->d_inode, type, format_id); 1535
1536 dput(dentry);
1537 return error;
1530} 1538}
1531 1539
1532/* Generic routine for getting common part of quota structure */ 1540/* Generic routine for getting common part of quota structure */
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 9900e333655a..6ab1dd0ca904 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -101,57 +101,6 @@
101/* Maximum number of poll wake up nests we are allowing */ 101/* Maximum number of poll wake up nests we are allowing */
102#define EP_MAX_POLLWAKE_NESTS 4 102#define EP_MAX_POLLWAKE_NESTS 4
103 103
104/* Macro to allocate a "struct epitem" from the slab cache */
105#define EPI_MEM_ALLOC() (struct epitem *) kmem_cache_alloc(epi_cache, SLAB_KERNEL)
106
107/* Macro to free a "struct epitem" to the slab cache */
108#define EPI_MEM_FREE(p) kmem_cache_free(epi_cache, p)
109
110/* Macro to allocate a "struct eppoll_entry" from the slab cache */
111#define PWQ_MEM_ALLOC() (struct eppoll_entry *) kmem_cache_alloc(pwq_cache, SLAB_KERNEL)
112
113/* Macro to free a "struct eppoll_entry" to the slab cache */
114#define PWQ_MEM_FREE(p) kmem_cache_free(pwq_cache, p)
115
116/* Fast test to see if the file is an evenpoll file */
117#define IS_FILE_EPOLL(f) ((f)->f_op == &eventpoll_fops)
118
119/* Setup the structure that is used as key for the rb-tree */
120#define EP_SET_FFD(p, f, d) do { (p)->file = (f); (p)->fd = (d); } while (0)
121
122/* Compare rb-tree keys */
123#define EP_CMP_FFD(p1, p2) ((p1)->file > (p2)->file ? +1: \
124 ((p1)->file < (p2)->file ? -1: (p1)->fd - (p2)->fd))
125
126/* Special initialization for the rb-tree node to detect linkage */
127#define EP_RB_INITNODE(n) (n)->rb_parent = (n)
128
129/* Removes a node from the rb-tree and marks it for a fast is-linked check */
130#define EP_RB_ERASE(n, r) do { rb_erase(n, r); (n)->rb_parent = (n); } while (0)
131
132/* Fast check to verify that the item is linked to the main rb-tree */
133#define EP_RB_LINKED(n) ((n)->rb_parent != (n))
134
135/*
136 * Remove the item from the list and perform its initialization.
137 * This is useful for us because we can test if the item is linked
138 * using "EP_IS_LINKED(p)".
139 */
140#define EP_LIST_DEL(p) do { list_del(p); INIT_LIST_HEAD(p); } while (0)
141
142/* Tells us if the item is currently linked */
143#define EP_IS_LINKED(p) (!list_empty(p))
144
145/* Get the "struct epitem" from a wait queue pointer */
146#define EP_ITEM_FROM_WAIT(p) ((struct epitem *) container_of(p, struct eppoll_entry, wait)->base)
147
148/* Get the "struct epitem" from an epoll queue wrapper */
149#define EP_ITEM_FROM_EPQUEUE(p) (container_of(p, struct ep_pqueue, pt)->epi)
150
151/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
152#define EP_OP_HASH_EVENT(op) ((op) != EPOLL_CTL_DEL)
153
154
155struct epoll_filefd { 104struct epoll_filefd {
156 struct file *file; 105 struct file *file;
157 int fd; 106 int fd;
@@ -357,6 +306,82 @@ static struct dentry_operations eventpollfs_dentry_operations = {
357 306
358 307
359 308
309/* Fast test to see if the file is an evenpoll file */
310static inline int is_file_epoll(struct file *f)
311{
312 return f->f_op == &eventpoll_fops;
313}
314
315/* Setup the structure that is used as key for the rb-tree */
316static inline void ep_set_ffd(struct epoll_filefd *ffd,
317 struct file *file, int fd)
318{
319 ffd->file = file;
320 ffd->fd = fd;
321}
322
323/* Compare rb-tree keys */
324static inline int ep_cmp_ffd(struct epoll_filefd *p1,
325 struct epoll_filefd *p2)
326{
327 return (p1->file > p2->file ? +1:
328 (p1->file < p2->file ? -1 : p1->fd - p2->fd));
329}
330
331/* Special initialization for the rb-tree node to detect linkage */
332static inline void ep_rb_initnode(struct rb_node *n)
333{
334 n->rb_parent = n;
335}
336
337/* Removes a node from the rb-tree and marks it for a fast is-linked check */
338static inline void ep_rb_erase(struct rb_node *n, struct rb_root *r)
339{
340 rb_erase(n, r);
341 n->rb_parent = n;
342}
343
344/* Fast check to verify that the item is linked to the main rb-tree */
345static inline int ep_rb_linked(struct rb_node *n)
346{
347 return n->rb_parent != n;
348}
349
350/*
351 * Remove the item from the list and perform its initialization.
352 * This is useful for us because we can test if the item is linked
353 * using "ep_is_linked(p)".
354 */
355static inline void ep_list_del(struct list_head *p)
356{
357 list_del(p);
358 INIT_LIST_HEAD(p);
359}
360
361/* Tells us if the item is currently linked */
362static inline int ep_is_linked(struct list_head *p)
363{
364 return !list_empty(p);
365}
366
367/* Get the "struct epitem" from a wait queue pointer */
368static inline struct epitem * ep_item_from_wait(wait_queue_t *p)
369{
370 return container_of(p, struct eppoll_entry, wait)->base;
371}
372
373/* Get the "struct epitem" from an epoll queue wrapper */
374static inline struct epitem * ep_item_from_epqueue(poll_table *p)
375{
376 return container_of(p, struct ep_pqueue, pt)->epi;
377}
378
379/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
380static inline int ep_op_hash_event(int op)
381{
382 return op != EPOLL_CTL_DEL;
383}
384
360/* Initialize the poll safe wake up structure */ 385/* Initialize the poll safe wake up structure */
361static void ep_poll_safewake_init(struct poll_safewake *psw) 386static void ep_poll_safewake_init(struct poll_safewake *psw)
362{ 387{
@@ -456,7 +481,7 @@ void eventpoll_release_file(struct file *file)
456 epi = list_entry(lsthead->next, struct epitem, fllink); 481 epi = list_entry(lsthead->next, struct epitem, fllink);
457 482
458 ep = epi->ep; 483 ep = epi->ep;
459 EP_LIST_DEL(&epi->fllink); 484 ep_list_del(&epi->fllink);
460 down_write(&ep->sem); 485 down_write(&ep->sem);
461 ep_remove(ep, epi); 486 ep_remove(ep, epi);
462 up_write(&ep->sem); 487 up_write(&ep->sem);
@@ -534,7 +559,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
534 current, epfd, op, fd, event)); 559 current, epfd, op, fd, event));
535 560
536 error = -EFAULT; 561 error = -EFAULT;
537 if (EP_OP_HASH_EVENT(op) && 562 if (ep_op_hash_event(op) &&
538 copy_from_user(&epds, event, sizeof(struct epoll_event))) 563 copy_from_user(&epds, event, sizeof(struct epoll_event)))
539 goto eexit_1; 564 goto eexit_1;
540 565
@@ -560,7 +585,7 @@ sys_epoll_ctl(int epfd, int op, int fd, struct epoll_event __user *event)
560 * adding an epoll file descriptor inside itself. 585 * adding an epoll file descriptor inside itself.
561 */ 586 */
562 error = -EINVAL; 587 error = -EINVAL;
563 if (file == tfile || !IS_FILE_EPOLL(file)) 588 if (file == tfile || !is_file_epoll(file))
564 goto eexit_3; 589 goto eexit_3;
565 590
566 /* 591 /*
@@ -656,7 +681,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
656 * the user passed to us _is_ an eventpoll file. 681 * the user passed to us _is_ an eventpoll file.
657 */ 682 */
658 error = -EINVAL; 683 error = -EINVAL;
659 if (!IS_FILE_EPOLL(file)) 684 if (!is_file_epoll(file))
660 goto eexit_2; 685 goto eexit_2;
661 686
662 /* 687 /*
@@ -831,11 +856,11 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd)
831 struct epitem *epi, *epir = NULL; 856 struct epitem *epi, *epir = NULL;
832 struct epoll_filefd ffd; 857 struct epoll_filefd ffd;
833 858
834 EP_SET_FFD(&ffd, file, fd); 859 ep_set_ffd(&ffd, file, fd);
835 read_lock_irqsave(&ep->lock, flags); 860 read_lock_irqsave(&ep->lock, flags);
836 for (rbp = ep->rbr.rb_node; rbp; ) { 861 for (rbp = ep->rbr.rb_node; rbp; ) {
837 epi = rb_entry(rbp, struct epitem, rbn); 862 epi = rb_entry(rbp, struct epitem, rbn);
838 kcmp = EP_CMP_FFD(&ffd, &epi->ffd); 863 kcmp = ep_cmp_ffd(&ffd, &epi->ffd);
839 if (kcmp > 0) 864 if (kcmp > 0)
840 rbp = rbp->rb_right; 865 rbp = rbp->rb_right;
841 else if (kcmp < 0) 866 else if (kcmp < 0)
@@ -875,7 +900,7 @@ static void ep_release_epitem(struct epitem *epi)
875{ 900{
876 901
877 if (atomic_dec_and_test(&epi->usecnt)) 902 if (atomic_dec_and_test(&epi->usecnt))
878 EPI_MEM_FREE(epi); 903 kmem_cache_free(epi_cache, epi);
879} 904}
880 905
881 906
@@ -886,10 +911,10 @@ static void ep_release_epitem(struct epitem *epi)
886static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead, 911static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
887 poll_table *pt) 912 poll_table *pt)
888{ 913{
889 struct epitem *epi = EP_ITEM_FROM_EPQUEUE(pt); 914 struct epitem *epi = ep_item_from_epqueue(pt);
890 struct eppoll_entry *pwq; 915 struct eppoll_entry *pwq;
891 916
892 if (epi->nwait >= 0 && (pwq = PWQ_MEM_ALLOC())) { 917 if (epi->nwait >= 0 && (pwq = kmem_cache_alloc(pwq_cache, SLAB_KERNEL))) {
893 init_waitqueue_func_entry(&pwq->wait, ep_poll_callback); 918 init_waitqueue_func_entry(&pwq->wait, ep_poll_callback);
894 pwq->whead = whead; 919 pwq->whead = whead;
895 pwq->base = epi; 920 pwq->base = epi;
@@ -912,7 +937,7 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
912 while (*p) { 937 while (*p) {
913 parent = *p; 938 parent = *p;
914 epic = rb_entry(parent, struct epitem, rbn); 939 epic = rb_entry(parent, struct epitem, rbn);
915 kcmp = EP_CMP_FFD(&epi->ffd, &epic->ffd); 940 kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd);
916 if (kcmp > 0) 941 if (kcmp > 0)
917 p = &parent->rb_right; 942 p = &parent->rb_right;
918 else 943 else
@@ -932,17 +957,17 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
932 struct ep_pqueue epq; 957 struct ep_pqueue epq;
933 958
934 error = -ENOMEM; 959 error = -ENOMEM;
935 if (!(epi = EPI_MEM_ALLOC())) 960 if (!(epi = kmem_cache_alloc(epi_cache, SLAB_KERNEL)))
936 goto eexit_1; 961 goto eexit_1;
937 962
938 /* Item initialization follow here ... */ 963 /* Item initialization follow here ... */
939 EP_RB_INITNODE(&epi->rbn); 964 ep_rb_initnode(&epi->rbn);
940 INIT_LIST_HEAD(&epi->rdllink); 965 INIT_LIST_HEAD(&epi->rdllink);
941 INIT_LIST_HEAD(&epi->fllink); 966 INIT_LIST_HEAD(&epi->fllink);
942 INIT_LIST_HEAD(&epi->txlink); 967 INIT_LIST_HEAD(&epi->txlink);
943 INIT_LIST_HEAD(&epi->pwqlist); 968 INIT_LIST_HEAD(&epi->pwqlist);
944 epi->ep = ep; 969 epi->ep = ep;
945 EP_SET_FFD(&epi->ffd, tfile, fd); 970 ep_set_ffd(&epi->ffd, tfile, fd);
946 epi->event = *event; 971 epi->event = *event;
947 atomic_set(&epi->usecnt, 1); 972 atomic_set(&epi->usecnt, 1);
948 epi->nwait = 0; 973 epi->nwait = 0;
@@ -978,7 +1003,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
978 ep_rbtree_insert(ep, epi); 1003 ep_rbtree_insert(ep, epi);
979 1004
980 /* If the file is already "ready" we drop it inside the ready list */ 1005 /* If the file is already "ready" we drop it inside the ready list */
981 if ((revents & event->events) && !EP_IS_LINKED(&epi->rdllink)) { 1006 if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
982 list_add_tail(&epi->rdllink, &ep->rdllist); 1007 list_add_tail(&epi->rdllink, &ep->rdllist);
983 1008
984 /* Notify waiting tasks that events are available */ 1009 /* Notify waiting tasks that events are available */
@@ -1007,11 +1032,11 @@ eexit_2:
1007 * allocated wait queue. 1032 * allocated wait queue.
1008 */ 1033 */
1009 write_lock_irqsave(&ep->lock, flags); 1034 write_lock_irqsave(&ep->lock, flags);
1010 if (EP_IS_LINKED(&epi->rdllink)) 1035 if (ep_is_linked(&epi->rdllink))
1011 EP_LIST_DEL(&epi->rdllink); 1036 ep_list_del(&epi->rdllink);
1012 write_unlock_irqrestore(&ep->lock, flags); 1037 write_unlock_irqrestore(&ep->lock, flags);
1013 1038
1014 EPI_MEM_FREE(epi); 1039 kmem_cache_free(epi_cache, epi);
1015eexit_1: 1040eexit_1:
1016 return error; 1041 return error;
1017} 1042}
@@ -1050,14 +1075,14 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
1050 * If the item is not linked to the hash it means that it's on its 1075 * If the item is not linked to the hash it means that it's on its
1051 * way toward the removal. Do nothing in this case. 1076 * way toward the removal. Do nothing in this case.
1052 */ 1077 */
1053 if (EP_RB_LINKED(&epi->rbn)) { 1078 if (ep_rb_linked(&epi->rbn)) {
1054 /* 1079 /*
1055 * If the item is "hot" and it is not registered inside the ready 1080 * If the item is "hot" and it is not registered inside the ready
1056 * list, push it inside. If the item is not "hot" and it is currently 1081 * list, push it inside. If the item is not "hot" and it is currently
1057 * registered inside the ready list, unlink it. 1082 * registered inside the ready list, unlink it.
1058 */ 1083 */
1059 if (revents & event->events) { 1084 if (revents & event->events) {
1060 if (!EP_IS_LINKED(&epi->rdllink)) { 1085 if (!ep_is_linked(&epi->rdllink)) {
1061 list_add_tail(&epi->rdllink, &ep->rdllist); 1086 list_add_tail(&epi->rdllink, &ep->rdllist);
1062 1087
1063 /* Notify waiting tasks that events are available */ 1088 /* Notify waiting tasks that events are available */
@@ -1097,9 +1122,9 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
1097 while (!list_empty(lsthead)) { 1122 while (!list_empty(lsthead)) {
1098 pwq = list_entry(lsthead->next, struct eppoll_entry, llink); 1123 pwq = list_entry(lsthead->next, struct eppoll_entry, llink);
1099 1124
1100 EP_LIST_DEL(&pwq->llink); 1125 ep_list_del(&pwq->llink);
1101 remove_wait_queue(pwq->whead, &pwq->wait); 1126 remove_wait_queue(pwq->whead, &pwq->wait);
1102 PWQ_MEM_FREE(pwq); 1127 kmem_cache_free(pwq_cache, pwq);
1103 } 1128 }
1104 } 1129 }
1105} 1130}
@@ -1118,7 +1143,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
1118 * The check protect us from doing a double unlink ( crash ). 1143 * The check protect us from doing a double unlink ( crash ).
1119 */ 1144 */
1120 error = -ENOENT; 1145 error = -ENOENT;
1121 if (!EP_RB_LINKED(&epi->rbn)) 1146 if (!ep_rb_linked(&epi->rbn))
1122 goto eexit_1; 1147 goto eexit_1;
1123 1148
1124 /* 1149 /*
@@ -1133,14 +1158,14 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
1133 * This operation togheter with the above check closes the door to 1158 * This operation togheter with the above check closes the door to
1134 * double unlinks. 1159 * double unlinks.
1135 */ 1160 */
1136 EP_RB_ERASE(&epi->rbn, &ep->rbr); 1161 ep_rb_erase(&epi->rbn, &ep->rbr);
1137 1162
1138 /* 1163 /*
1139 * If the item we are going to remove is inside the ready file descriptors 1164 * If the item we are going to remove is inside the ready file descriptors
1140 * we want to remove it from this list to avoid stale events. 1165 * we want to remove it from this list to avoid stale events.
1141 */ 1166 */
1142 if (EP_IS_LINKED(&epi->rdllink)) 1167 if (ep_is_linked(&epi->rdllink))
1143 EP_LIST_DEL(&epi->rdllink); 1168 ep_list_del(&epi->rdllink);
1144 1169
1145 error = 0; 1170 error = 0;
1146eexit_1: 1171eexit_1:
@@ -1174,8 +1199,8 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
1174 1199
1175 /* Remove the current item from the list of epoll hooks */ 1200 /* Remove the current item from the list of epoll hooks */
1176 spin_lock(&file->f_ep_lock); 1201 spin_lock(&file->f_ep_lock);
1177 if (EP_IS_LINKED(&epi->fllink)) 1202 if (ep_is_linked(&epi->fllink))
1178 EP_LIST_DEL(&epi->fllink); 1203 ep_list_del(&epi->fllink);
1179 spin_unlock(&file->f_ep_lock); 1204 spin_unlock(&file->f_ep_lock);
1180 1205
1181 /* We need to acquire the write IRQ lock before calling ep_unlink() */ 1206 /* We need to acquire the write IRQ lock before calling ep_unlink() */
@@ -1210,7 +1235,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
1210{ 1235{
1211 int pwake = 0; 1236 int pwake = 0;
1212 unsigned long flags; 1237 unsigned long flags;
1213 struct epitem *epi = EP_ITEM_FROM_WAIT(wait); 1238 struct epitem *epi = ep_item_from_wait(wait);
1214 struct eventpoll *ep = epi->ep; 1239 struct eventpoll *ep = epi->ep;
1215 1240
1216 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n", 1241 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
@@ -1228,7 +1253,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
1228 goto is_disabled; 1253 goto is_disabled;
1229 1254
1230 /* If this file is already in the ready list we exit soon */ 1255 /* If this file is already in the ready list we exit soon */
1231 if (EP_IS_LINKED(&epi->rdllink)) 1256 if (ep_is_linked(&epi->rdllink))
1232 goto is_linked; 1257 goto is_linked;
1233 1258
1234 list_add_tail(&epi->rdllink, &ep->rdllist); 1259 list_add_tail(&epi->rdllink, &ep->rdllist);
@@ -1307,7 +1332,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
1307 lnk = lnk->next; 1332 lnk = lnk->next;
1308 1333
1309 /* If this file is already in the ready list we exit soon */ 1334 /* If this file is already in the ready list we exit soon */
1310 if (!EP_IS_LINKED(&epi->txlink)) { 1335 if (!ep_is_linked(&epi->txlink)) {
1311 /* 1336 /*
1312 * This is initialized in this way so that the default 1337 * This is initialized in this way so that the default
1313 * behaviour of the reinjecting code will be to push back 1338 * behaviour of the reinjecting code will be to push back
@@ -1322,7 +1347,7 @@ static int ep_collect_ready_items(struct eventpoll *ep, struct list_head *txlist
1322 /* 1347 /*
1323 * Unlink the item from the ready list. 1348 * Unlink the item from the ready list.
1324 */ 1349 */
1325 EP_LIST_DEL(&epi->rdllink); 1350 ep_list_del(&epi->rdllink);
1326 } 1351 }
1327 } 1352 }
1328 1353
@@ -1401,7 +1426,7 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
1401 epi = list_entry(txlist->next, struct epitem, txlink); 1426 epi = list_entry(txlist->next, struct epitem, txlink);
1402 1427
1403 /* Unlink the current item from the transfer list */ 1428 /* Unlink the current item from the transfer list */
1404 EP_LIST_DEL(&epi->txlink); 1429 ep_list_del(&epi->txlink);
1405 1430
1406 /* 1431 /*
1407 * If the item is no more linked to the interest set, we don't 1432 * If the item is no more linked to the interest set, we don't
@@ -1410,8 +1435,8 @@ static void ep_reinject_items(struct eventpoll *ep, struct list_head *txlist)
1410 * item is set to have an Edge Triggered behaviour, we don't have 1435 * item is set to have an Edge Triggered behaviour, we don't have
1411 * to push it back either. 1436 * to push it back either.
1412 */ 1437 */
1413 if (EP_RB_LINKED(&epi->rbn) && !(epi->event.events & EPOLLET) && 1438 if (ep_rb_linked(&epi->rbn) && !(epi->event.events & EPOLLET) &&
1414 (epi->revents & epi->event.events) && !EP_IS_LINKED(&epi->rdllink)) { 1439 (epi->revents & epi->event.events) && !ep_is_linked(&epi->rdllink)) {
1415 list_add_tail(&epi->rdllink, &ep->rdllist); 1440 list_add_tail(&epi->rdllink, &ep->rdllist);
1416 ricnt++; 1441 ricnt++;
1417 } 1442 }
diff --git a/fs/exec.c b/fs/exec.c
index 3a4b35a14c0d..48871917d363 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -58,6 +58,9 @@
58 58
59int core_uses_pid; 59int core_uses_pid;
60char core_pattern[65] = "core"; 60char core_pattern[65] = "core";
61int suid_dumpable = 0;
62
63EXPORT_SYMBOL(suid_dumpable);
61/* The maximal length of core_pattern is also specified in sysctl.c */ 64/* The maximal length of core_pattern is also specified in sysctl.c */
62 65
63static struct linux_binfmt *formats; 66static struct linux_binfmt *formats;
@@ -864,6 +867,9 @@ int flush_old_exec(struct linux_binprm * bprm)
864 867
865 if (current->euid == current->uid && current->egid == current->gid) 868 if (current->euid == current->uid && current->egid == current->gid)
866 current->mm->dumpable = 1; 869 current->mm->dumpable = 1;
870 else
871 current->mm->dumpable = suid_dumpable;
872
867 name = bprm->filename; 873 name = bprm->filename;
868 874
869 /* Copies the binary name from after last slash */ 875 /* Copies the binary name from after last slash */
@@ -884,7 +890,7 @@ int flush_old_exec(struct linux_binprm * bprm)
884 permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) || 890 permission(bprm->file->f_dentry->d_inode,MAY_READ, NULL) ||
885 (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) { 891 (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
886 suid_keys(current); 892 suid_keys(current);
887 current->mm->dumpable = 0; 893 current->mm->dumpable = suid_dumpable;
888 } 894 }
889 895
890 /* An exec changes our domain. We are no longer part of the thread 896 /* An exec changes our domain. We are no longer part of the thread
@@ -1432,6 +1438,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1432 struct inode * inode; 1438 struct inode * inode;
1433 struct file * file; 1439 struct file * file;
1434 int retval = 0; 1440 int retval = 0;
1441 int fsuid = current->fsuid;
1442 int flag = 0;
1435 1443
1436 binfmt = current->binfmt; 1444 binfmt = current->binfmt;
1437 if (!binfmt || !binfmt->core_dump) 1445 if (!binfmt || !binfmt->core_dump)
@@ -1441,6 +1449,16 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1441 up_write(&mm->mmap_sem); 1449 up_write(&mm->mmap_sem);
1442 goto fail; 1450 goto fail;
1443 } 1451 }
1452
1453 /*
1454 * We cannot trust fsuid as being the "true" uid of the
1455 * process nor do we know its entire history. We only know it
1456 * was tainted so we dump it as root in mode 2.
1457 */
1458 if (mm->dumpable == 2) { /* Setuid core dump mode */
1459 flag = O_EXCL; /* Stop rewrite attacks */
1460 current->fsuid = 0; /* Dump root private */
1461 }
1444 mm->dumpable = 0; 1462 mm->dumpable = 0;
1445 init_completion(&mm->core_done); 1463 init_completion(&mm->core_done);
1446 spin_lock_irq(&current->sighand->siglock); 1464 spin_lock_irq(&current->sighand->siglock);
@@ -1466,7 +1484,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1466 lock_kernel(); 1484 lock_kernel();
1467 format_corename(corename, core_pattern, signr); 1485 format_corename(corename, core_pattern, signr);
1468 unlock_kernel(); 1486 unlock_kernel();
1469 file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE, 0600); 1487 file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600);
1470 if (IS_ERR(file)) 1488 if (IS_ERR(file))
1471 goto fail_unlock; 1489 goto fail_unlock;
1472 inode = file->f_dentry->d_inode; 1490 inode = file->f_dentry->d_inode;
@@ -1491,6 +1509,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1491close_fail: 1509close_fail:
1492 filp_close(file, NULL); 1510 filp_close(file, NULL);
1493fail_unlock: 1511fail_unlock:
1512 current->fsuid = fsuid;
1494 complete_all(&mm->core_done); 1513 complete_all(&mm->core_done);
1495fail: 1514fail:
1496 return retval; 1515 return retval;
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index ee240a14e70f..c5d02da73bc3 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -10,3 +10,4 @@ ext2-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
10ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o 10ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
11ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o 11ext2-$(CONFIG_EXT2_FS_POSIX_ACL) += acl.o
12ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o 12ext2-$(CONFIG_EXT2_FS_SECURITY) += xattr_security.o
13ext2-$(CONFIG_EXT2_FS_XIP) += xip.o
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 25f4a64fd6bc..213148c36ebe 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -396,12 +396,12 @@ static size_t
396ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size, 396ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
397 const char *name, size_t name_len) 397 const char *name, size_t name_len)
398{ 398{
399 const size_t size = sizeof(XATTR_NAME_ACL_ACCESS); 399 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
400 400
401 if (!test_opt(inode->i_sb, POSIX_ACL)) 401 if (!test_opt(inode->i_sb, POSIX_ACL))
402 return 0; 402 return 0;
403 if (list && size <= list_size) 403 if (list && size <= list_size)
404 memcpy(list, XATTR_NAME_ACL_ACCESS, size); 404 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
405 return size; 405 return size;
406} 406}
407 407
@@ -409,12 +409,12 @@ static size_t
409ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size, 409ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
410 const char *name, size_t name_len) 410 const char *name, size_t name_len)
411{ 411{
412 const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT); 412 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
413 413
414 if (!test_opt(inode->i_sb, POSIX_ACL)) 414 if (!test_opt(inode->i_sb, POSIX_ACL))
415 return 0; 415 return 0;
416 if (list && size <= list_size) 416 if (list && size <= list_size)
417 memcpy(list, XATTR_NAME_ACL_DEFAULT, size); 417 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
418 return size; 418 return size;
419} 419}
420 420
@@ -506,14 +506,14 @@ ext2_xattr_set_acl_default(struct inode *inode, const char *name,
506} 506}
507 507
508struct xattr_handler ext2_xattr_acl_access_handler = { 508struct xattr_handler ext2_xattr_acl_access_handler = {
509 .prefix = XATTR_NAME_ACL_ACCESS, 509 .prefix = POSIX_ACL_XATTR_ACCESS,
510 .list = ext2_xattr_list_acl_access, 510 .list = ext2_xattr_list_acl_access,
511 .get = ext2_xattr_get_acl_access, 511 .get = ext2_xattr_get_acl_access,
512 .set = ext2_xattr_set_acl_access, 512 .set = ext2_xattr_set_acl_access,
513}; 513};
514 514
515struct xattr_handler ext2_xattr_acl_default_handler = { 515struct xattr_handler ext2_xattr_acl_default_handler = {
516 .prefix = XATTR_NAME_ACL_DEFAULT, 516 .prefix = POSIX_ACL_XATTR_DEFAULT,
517 .list = ext2_xattr_list_acl_default, 517 .list = ext2_xattr_list_acl_default,
518 .get = ext2_xattr_get_acl_default, 518 .get = ext2_xattr_get_acl_default,
519 .set = ext2_xattr_set_acl_default, 519 .set = ext2_xattr_set_acl_default,
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index fed96ae81a7d..0bde85bafe38 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -4,7 +4,7 @@
4 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> 4 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
5*/ 5*/
6 6
7#include <linux/xattr_acl.h> 7#include <linux/posix_acl_xattr.h>
8 8
9#define EXT2_ACL_VERSION 0x0001 9#define EXT2_ACL_VERSION 0x0001
10 10
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 8f0fd726c3f1..eed521d22cf0 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -147,9 +147,11 @@ extern struct file_operations ext2_dir_operations;
147/* file.c */ 147/* file.c */
148extern struct inode_operations ext2_file_inode_operations; 148extern struct inode_operations ext2_file_inode_operations;
149extern struct file_operations ext2_file_operations; 149extern struct file_operations ext2_file_operations;
150extern struct file_operations ext2_xip_file_operations;
150 151
151/* inode.c */ 152/* inode.c */
152extern struct address_space_operations ext2_aops; 153extern struct address_space_operations ext2_aops;
154extern struct address_space_operations ext2_aops_xip;
153extern struct address_space_operations ext2_nobh_aops; 155extern struct address_space_operations ext2_nobh_aops;
154 156
155/* namei.c */ 157/* namei.c */
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index f5e86141ec54..a484412fc782 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -55,6 +55,20 @@ struct file_operations ext2_file_operations = {
55 .sendfile = generic_file_sendfile, 55 .sendfile = generic_file_sendfile,
56}; 56};
57 57
58#ifdef CONFIG_EXT2_FS_XIP
59struct file_operations ext2_xip_file_operations = {
60 .llseek = generic_file_llseek,
61 .read = xip_file_read,
62 .write = xip_file_write,
63 .ioctl = ext2_ioctl,
64 .mmap = xip_file_mmap,
65 .open = generic_file_open,
66 .release = ext2_release_file,
67 .fsync = ext2_sync_file,
68 .sendfile = xip_file_sendfile,
69};
70#endif
71
58struct inode_operations ext2_file_inode_operations = { 72struct inode_operations ext2_file_inode_operations = {
59 .truncate = ext2_truncate, 73 .truncate = ext2_truncate,
60#ifdef CONFIG_EXT2_FS_XATTR 74#ifdef CONFIG_EXT2_FS_XATTR
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index a50d9db4b6e4..53dceb0c6593 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -33,6 +33,7 @@
33#include <linux/mpage.h> 33#include <linux/mpage.h>
34#include "ext2.h" 34#include "ext2.h"
35#include "acl.h" 35#include "acl.h"
36#include "xip.h"
36 37
37MODULE_AUTHOR("Remy Card and others"); 38MODULE_AUTHOR("Remy Card and others");
38MODULE_DESCRIPTION("Second Extended Filesystem"); 39MODULE_DESCRIPTION("Second Extended Filesystem");
@@ -594,6 +595,16 @@ out:
594 if (err) 595 if (err)
595 goto cleanup; 596 goto cleanup;
596 597
598 if (ext2_use_xip(inode->i_sb)) {
599 /*
600 * we need to clear the block
601 */
602 err = ext2_clear_xip_target (inode,
603 le32_to_cpu(chain[depth-1].key));
604 if (err)
605 goto cleanup;
606 }
607
597 if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0) 608 if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0)
598 goto changed; 609 goto changed;
599 610
@@ -691,6 +702,11 @@ struct address_space_operations ext2_aops = {
691 .writepages = ext2_writepages, 702 .writepages = ext2_writepages,
692}; 703};
693 704
705struct address_space_operations ext2_aops_xip = {
706 .bmap = ext2_bmap,
707 .get_xip_page = ext2_get_xip_page,
708};
709
694struct address_space_operations ext2_nobh_aops = { 710struct address_space_operations ext2_nobh_aops = {
695 .readpage = ext2_readpage, 711 .readpage = ext2_readpage,
696 .readpages = ext2_readpages, 712 .readpages = ext2_readpages,
@@ -910,7 +926,9 @@ void ext2_truncate (struct inode * inode)
910 iblock = (inode->i_size + blocksize-1) 926 iblock = (inode->i_size + blocksize-1)
911 >> EXT2_BLOCK_SIZE_BITS(inode->i_sb); 927 >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
912 928
913 if (test_opt(inode->i_sb, NOBH)) 929 if (mapping_is_xip(inode->i_mapping))
930 xip_truncate_page(inode->i_mapping, inode->i_size);
931 else if (test_opt(inode->i_sb, NOBH))
914 nobh_truncate_page(inode->i_mapping, inode->i_size); 932 nobh_truncate_page(inode->i_mapping, inode->i_size);
915 else 933 else
916 block_truncate_page(inode->i_mapping, 934 block_truncate_page(inode->i_mapping,
@@ -1110,11 +1128,16 @@ void ext2_read_inode (struct inode * inode)
1110 1128
1111 if (S_ISREG(inode->i_mode)) { 1129 if (S_ISREG(inode->i_mode)) {
1112 inode->i_op = &ext2_file_inode_operations; 1130 inode->i_op = &ext2_file_inode_operations;
1113 inode->i_fop = &ext2_file_operations; 1131 if (ext2_use_xip(inode->i_sb)) {
1114 if (test_opt(inode->i_sb, NOBH)) 1132 inode->i_mapping->a_ops = &ext2_aops_xip;
1133 inode->i_fop = &ext2_xip_file_operations;
1134 } else if (test_opt(inode->i_sb, NOBH)) {
1115 inode->i_mapping->a_ops = &ext2_nobh_aops; 1135 inode->i_mapping->a_ops = &ext2_nobh_aops;
1116 else 1136 inode->i_fop = &ext2_file_operations;
1137 } else {
1117 inode->i_mapping->a_ops = &ext2_aops; 1138 inode->i_mapping->a_ops = &ext2_aops;
1139 inode->i_fop = &ext2_file_operations;
1140 }
1118 } else if (S_ISDIR(inode->i_mode)) { 1141 } else if (S_ISDIR(inode->i_mode)) {
1119 inode->i_op = &ext2_dir_inode_operations; 1142 inode->i_op = &ext2_dir_inode_operations;
1120 inode->i_fop = &ext2_dir_operations; 1143 inode->i_fop = &ext2_dir_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 3176b3d3ffa8..c5513953c825 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -34,6 +34,7 @@
34#include "ext2.h" 34#include "ext2.h"
35#include "xattr.h" 35#include "xattr.h"
36#include "acl.h" 36#include "acl.h"
37#include "xip.h"
37 38
38/* 39/*
39 * Couple of helper functions - make the code slightly cleaner. 40 * Couple of helper functions - make the code slightly cleaner.
@@ -127,11 +128,16 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, int mode, st
127 int err = PTR_ERR(inode); 128 int err = PTR_ERR(inode);
128 if (!IS_ERR(inode)) { 129 if (!IS_ERR(inode)) {
129 inode->i_op = &ext2_file_inode_operations; 130 inode->i_op = &ext2_file_inode_operations;
130 inode->i_fop = &ext2_file_operations; 131 if (ext2_use_xip(inode->i_sb)) {
131 if (test_opt(inode->i_sb, NOBH)) 132 inode->i_mapping->a_ops = &ext2_aops_xip;
133 inode->i_fop = &ext2_xip_file_operations;
134 } else if (test_opt(inode->i_sb, NOBH)) {
132 inode->i_mapping->a_ops = &ext2_nobh_aops; 135 inode->i_mapping->a_ops = &ext2_nobh_aops;
133 else 136 inode->i_fop = &ext2_file_operations;
137 } else {
134 inode->i_mapping->a_ops = &ext2_aops; 138 inode->i_mapping->a_ops = &ext2_aops;
139 inode->i_fop = &ext2_file_operations;
140 }
135 mark_inode_dirty(inode); 141 mark_inode_dirty(inode);
136 err = ext2_add_nondir(dentry, inode); 142 err = ext2_add_nondir(dentry, inode);
137 } 143 }
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 661c3d98d946..876e391f2871 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
31#include "ext2.h" 31#include "ext2.h"
32#include "xattr.h" 32#include "xattr.h"
33#include "acl.h" 33#include "acl.h"
34#include "xip.h"
34 35
35static void ext2_sync_super(struct super_block *sb, 36static void ext2_sync_super(struct super_block *sb,
36 struct ext2_super_block *es); 37 struct ext2_super_block *es);
@@ -257,7 +258,7 @@ enum {
257 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 258 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
258 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 259 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
259 Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, 260 Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh,
260 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 261 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip,
261 Opt_ignore, Opt_err, 262 Opt_ignore, Opt_err,
262}; 263};
263 264
@@ -286,6 +287,7 @@ static match_table_t tokens = {
286 {Opt_nouser_xattr, "nouser_xattr"}, 287 {Opt_nouser_xattr, "nouser_xattr"},
287 {Opt_acl, "acl"}, 288 {Opt_acl, "acl"},
288 {Opt_noacl, "noacl"}, 289 {Opt_noacl, "noacl"},
290 {Opt_xip, "xip"},
289 {Opt_ignore, "grpquota"}, 291 {Opt_ignore, "grpquota"},
290 {Opt_ignore, "noquota"}, 292 {Opt_ignore, "noquota"},
291 {Opt_ignore, "quota"}, 293 {Opt_ignore, "quota"},
@@ -397,6 +399,13 @@ static int parse_options (char * options,
397 printk("EXT2 (no)acl options not supported\n"); 399 printk("EXT2 (no)acl options not supported\n");
398 break; 400 break;
399#endif 401#endif
402 case Opt_xip:
403#ifdef CONFIG_EXT2_FS_XIP
404 set_opt (sbi->s_mount_opt, XIP);
405#else
406 printk("EXT2 xip option not supported\n");
407#endif
408 break;
400 case Opt_ignore: 409 case Opt_ignore:
401 break; 410 break;
402 default: 411 default:
@@ -640,6 +649,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
640 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? 649 ((EXT2_SB(sb)->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ?
641 MS_POSIXACL : 0); 650 MS_POSIXACL : 0);
642 651
652 ext2_xip_verify_sb(sb); /* see if bdev supports xip, unset
653 EXT2_MOUNT_XIP if not */
654
643 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV && 655 if (le32_to_cpu(es->s_rev_level) == EXT2_GOOD_OLD_REV &&
644 (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) || 656 (EXT2_HAS_COMPAT_FEATURE(sb, ~0U) ||
645 EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) || 657 EXT2_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
@@ -668,6 +680,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
668 680
669 blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); 681 blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
670 682
683 if ((ext2_use_xip(sb)) && ((blocksize != PAGE_SIZE) ||
684 (sb->s_blocksize != blocksize))) {
685 if (!silent)
686 printk("XIP: Unsupported blocksize\n");
687 goto failed_mount;
688 }
689
671 /* If the blocksize doesn't match, re-read the thing.. */ 690 /* If the blocksize doesn't match, re-read the thing.. */
672 if (sb->s_blocksize != blocksize) { 691 if (sb->s_blocksize != blocksize) {
673 brelse(bh); 692 brelse(bh);
@@ -916,6 +935,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
916{ 935{
917 struct ext2_sb_info * sbi = EXT2_SB(sb); 936 struct ext2_sb_info * sbi = EXT2_SB(sb);
918 struct ext2_super_block * es; 937 struct ext2_super_block * es;
938 unsigned long old_mount_opt = sbi->s_mount_opt;
919 939
920 /* 940 /*
921 * Allow the "check" option to be passed as a remount option. 941 * Allow the "check" option to be passed as a remount option.
@@ -927,6 +947,11 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
927 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0); 947 ((sbi->s_mount_opt & EXT2_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
928 948
929 es = sbi->s_es; 949 es = sbi->s_es;
950 if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
951 (old_mount_opt & EXT2_MOUNT_XIP)) &&
952 invalidate_inodes(sb))
953 ext2_warning(sb, __FUNCTION__, "busy inodes while remounting "\
954 "xip remain in cache (no functional problem)");
930 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) 955 if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
931 return 0; 956 return 0;
932 if (*flags & MS_RDONLY) { 957 if (*flags & MS_RDONLY) {
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
new file mode 100644
index 000000000000..d44431d1a338
--- /dev/null
+++ b/fs/ext2/xip.c
@@ -0,0 +1,80 @@
1/*
2 * linux/fs/ext2/xip.c
3 *
4 * Copyright (C) 2005 IBM Corporation
5 * Author: Carsten Otte (cotte@de.ibm.com)
6 */
7
8#include <linux/mm.h>
9#include <linux/fs.h>
10#include <linux/genhd.h>
11#include <linux/buffer_head.h>
12#include <linux/ext2_fs_sb.h>
13#include <linux/ext2_fs.h>
14#include "ext2.h"
15#include "xip.h"
16
17static inline int
18__inode_direct_access(struct inode *inode, sector_t sector, unsigned long *data) {
19 BUG_ON(!inode->i_sb->s_bdev->bd_disk->fops->direct_access);
20 return inode->i_sb->s_bdev->bd_disk->fops
21 ->direct_access(inode->i_sb->s_bdev,sector,data);
22}
23
24int
25ext2_clear_xip_target(struct inode *inode, int block) {
26 sector_t sector = block*(PAGE_SIZE/512);
27 unsigned long data;
28 int rc;
29
30 rc = __inode_direct_access(inode, sector, &data);
31 if (rc)
32 return rc;
33 clear_page((void*)data);
34 return 0;
35}
36
37void ext2_xip_verify_sb(struct super_block *sb)
38{
39 struct ext2_sb_info *sbi = EXT2_SB(sb);
40
41 if ((sbi->s_mount_opt & EXT2_MOUNT_XIP)) {
42 if ((sb->s_bdev == NULL) ||
43 sb->s_bdev->bd_disk == NULL ||
44 sb->s_bdev->bd_disk->fops == NULL ||
45 sb->s_bdev->bd_disk->fops->direct_access == NULL) {
46 sbi->s_mount_opt &= (~EXT2_MOUNT_XIP);
47 ext2_warning(sb, __FUNCTION__,
48 "ignoring xip option - not supported by bdev");
49 }
50 }
51}
52
53struct page*
54ext2_get_xip_page(struct address_space *mapping, sector_t blockno,
55 int create)
56{
57 int rc;
58 unsigned long data;
59 struct buffer_head tmp;
60
61 tmp.b_state = 0;
62 tmp.b_blocknr = 0;
63 rc = ext2_get_block(mapping->host, blockno/(PAGE_SIZE/512) , &tmp,
64 create);
65 if (rc)
66 return ERR_PTR(rc);
67 if (tmp.b_blocknr == 0) {
68 /* SPARSE block */
69 BUG_ON(create);
70 return ERR_PTR(-ENODATA);
71 }
72
73 rc = __inode_direct_access
74 (mapping->host,tmp.b_blocknr*(PAGE_SIZE/512) ,&data);
75 if (rc)
76 return ERR_PTR(rc);
77
78 SetPageUptodate(virt_to_page(data));
79 return virt_to_page(data);
80}
diff --git a/fs/ext2/xip.h b/fs/ext2/xip.h
new file mode 100644
index 000000000000..aa85331d6c56
--- /dev/null
+++ b/fs/ext2/xip.h
@@ -0,0 +1,25 @@
1/*
2 * linux/fs/ext2/xip.h
3 *
4 * Copyright (C) 2005 IBM Corporation
5 * Author: Carsten Otte (cotte@de.ibm.com)
6 */
7
8#ifdef CONFIG_EXT2_FS_XIP
9extern void ext2_xip_verify_sb (struct super_block *);
10extern int ext2_clear_xip_target (struct inode *, int);
11
12static inline int ext2_use_xip (struct super_block *sb)
13{
14 struct ext2_sb_info *sbi = EXT2_SB(sb);
15 return (sbi->s_mount_opt & EXT2_MOUNT_XIP);
16}
17struct page* ext2_get_xip_page (struct address_space *, sector_t, int);
18#define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page)
19#else
20#define mapping_is_xip(map) 0
21#define ext2_xip_verify_sb(sb) do { } while (0)
22#define ext2_use_xip(sb) 0
23#define ext2_clear_xip_target(inode, chain) 0
24#define ext2_get_xip_page NULL
25#endif
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 638c13a26c03..3ac38266fc9e 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -393,7 +393,8 @@ ext3_acl_chmod(struct inode *inode)
393 int retries = 0; 393 int retries = 0;
394 394
395 retry: 395 retry:
396 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS); 396 handle = ext3_journal_start(inode,
397 EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
397 if (IS_ERR(handle)) { 398 if (IS_ERR(handle)) {
398 error = PTR_ERR(handle); 399 error = PTR_ERR(handle);
399 ext3_std_error(inode->i_sb, error); 400 ext3_std_error(inode->i_sb, error);
@@ -417,12 +418,12 @@ static size_t
417ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len, 418ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
418 const char *name, size_t name_len) 419 const char *name, size_t name_len)
419{ 420{
420 const size_t size = sizeof(XATTR_NAME_ACL_ACCESS); 421 const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
421 422
422 if (!test_opt(inode->i_sb, POSIX_ACL)) 423 if (!test_opt(inode->i_sb, POSIX_ACL))
423 return 0; 424 return 0;
424 if (list && size <= list_len) 425 if (list && size <= list_len)
425 memcpy(list, XATTR_NAME_ACL_ACCESS, size); 426 memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
426 return size; 427 return size;
427} 428}
428 429
@@ -430,12 +431,12 @@ static size_t
430ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len, 431ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
431 const char *name, size_t name_len) 432 const char *name, size_t name_len)
432{ 433{
433 const size_t size = sizeof(XATTR_NAME_ACL_DEFAULT); 434 const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
434 435
435 if (!test_opt(inode->i_sb, POSIX_ACL)) 436 if (!test_opt(inode->i_sb, POSIX_ACL))
436 return 0; 437 return 0;
437 if (list && size <= list_len) 438 if (list && size <= list_len)
438 memcpy(list, XATTR_NAME_ACL_DEFAULT, size); 439 memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
439 return size; 440 return size;
440} 441}
441 442
@@ -503,7 +504,7 @@ ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
503 acl = NULL; 504 acl = NULL;
504 505
505retry: 506retry:
506 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS); 507 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
507 if (IS_ERR(handle)) 508 if (IS_ERR(handle))
508 return PTR_ERR(handle); 509 return PTR_ERR(handle);
509 error = ext3_set_acl(handle, inode, type, acl); 510 error = ext3_set_acl(handle, inode, type, acl);
@@ -535,14 +536,14 @@ ext3_xattr_set_acl_default(struct inode *inode, const char *name,
535} 536}
536 537
537struct xattr_handler ext3_xattr_acl_access_handler = { 538struct xattr_handler ext3_xattr_acl_access_handler = {
538 .prefix = XATTR_NAME_ACL_ACCESS, 539 .prefix = POSIX_ACL_XATTR_ACCESS,
539 .list = ext3_xattr_list_acl_access, 540 .list = ext3_xattr_list_acl_access,
540 .get = ext3_xattr_get_acl_access, 541 .get = ext3_xattr_get_acl_access,
541 .set = ext3_xattr_set_acl_access, 542 .set = ext3_xattr_set_acl_access,
542}; 543};
543 544
544struct xattr_handler ext3_xattr_acl_default_handler = { 545struct xattr_handler ext3_xattr_acl_default_handler = {
545 .prefix = XATTR_NAME_ACL_DEFAULT, 546 .prefix = POSIX_ACL_XATTR_DEFAULT,
546 .list = ext3_xattr_list_acl_default, 547 .list = ext3_xattr_list_acl_default,
547 .get = ext3_xattr_get_acl_default, 548 .get = ext3_xattr_get_acl_default,
548 .set = ext3_xattr_set_acl_default, 549 .set = ext3_xattr_set_acl_default,
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 98af0c0d0ba9..92d50b53a933 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -4,7 +4,7 @@
4 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org> 4 (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
5*/ 5*/
6 6
7#include <linux/xattr_acl.h> 7#include <linux/posix_acl_xattr.h>
8 8
9#define EXT3_ACL_VERSION 0x0001 9#define EXT3_ACL_VERSION 0x0001
10 10
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0d5fa73b18dc..0b2db4f618cb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -128,7 +128,7 @@ static unsigned long blocks_for_truncate(struct inode *inode)
128 if (needed > EXT3_MAX_TRANS_DATA) 128 if (needed > EXT3_MAX_TRANS_DATA)
129 needed = EXT3_MAX_TRANS_DATA; 129 needed = EXT3_MAX_TRANS_DATA;
130 130
131 return EXT3_DATA_TRANS_BLOCKS + needed; 131 return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
132} 132}
133 133
134/* 134/*
@@ -2763,7 +2763,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
2763 2763
2764 /* (user+group)*(old+new) structure, inode write (sb, 2764 /* (user+group)*(old+new) structure, inode write (sb,
2765 * inode block, ? - but truncate inode update has it) */ 2765 * inode block, ? - but truncate inode update has it) */
2766 handle = ext3_journal_start(inode, 4*EXT3_QUOTA_INIT_BLOCKS+3); 2766 handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
2767 EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
2767 if (IS_ERR(handle)) { 2768 if (IS_ERR(handle)) {
2768 error = PTR_ERR(handle); 2769 error = PTR_ERR(handle);
2769 goto err_out; 2770 goto err_out;
@@ -2861,7 +2862,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
2861#ifdef CONFIG_QUOTA 2862#ifdef CONFIG_QUOTA
2862 /* We know that structure was already allocated during DQUOT_INIT so 2863 /* We know that structure was already allocated during DQUOT_INIT so
2863 * we will be updating only the data blocks + inodes */ 2864 * we will be updating only the data blocks + inodes */
2864 ret += 2*EXT3_QUOTA_TRANS_BLOCKS; 2865 ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
2865#endif 2866#endif
2866 2867
2867 return ret; 2868 return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 79742d824a0a..50378d8ff84b 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -932,8 +932,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
932 struct inode *dir = dentry->d_parent->d_inode; 932 struct inode *dir = dentry->d_parent->d_inode;
933 933
934 sb = dir->i_sb; 934 sb = dir->i_sb;
935 if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) 935 /* NFS may look up ".." - look at dx_root directory block */
936 return NULL; 936 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
937 if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
938 return NULL;
939 } else {
940 frame = frames;
941 frame->bh = NULL; /* for dx_release() */
942 frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
943 dx_set_block(frame->at, 0); /* dx_root block is 0 */
944 }
937 hash = hinfo.hash; 945 hash = hinfo.hash;
938 do { 946 do {
939 block = dx_get_block(frame->at); 947 block = dx_get_block(frame->at);
@@ -1637,9 +1645,9 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1637 int err, retries = 0; 1645 int err, retries = 0;
1638 1646
1639retry: 1647retry:
1640 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 1648 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1641 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1649 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1642 2*EXT3_QUOTA_INIT_BLOCKS); 1650 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
1643 if (IS_ERR(handle)) 1651 if (IS_ERR(handle))
1644 return PTR_ERR(handle); 1652 return PTR_ERR(handle);
1645 1653
@@ -1671,9 +1679,9 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
1671 return -EINVAL; 1679 return -EINVAL;
1672 1680
1673retry: 1681retry:
1674 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 1682 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1675 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1683 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1676 2*EXT3_QUOTA_INIT_BLOCKS); 1684 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
1677 if (IS_ERR(handle)) 1685 if (IS_ERR(handle))
1678 return PTR_ERR(handle); 1686 return PTR_ERR(handle);
1679 1687
@@ -1707,9 +1715,9 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
1707 return -EMLINK; 1715 return -EMLINK;
1708 1716
1709retry: 1717retry:
1710 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 1718 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
1711 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1719 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1712 2*EXT3_QUOTA_INIT_BLOCKS); 1720 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
1713 if (IS_ERR(handle)) 1721 if (IS_ERR(handle))
1714 return PTR_ERR(handle); 1722 return PTR_ERR(handle);
1715 1723
@@ -1998,7 +2006,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
1998 /* Initialize quotas before so that eventual writes go in 2006 /* Initialize quotas before so that eventual writes go in
1999 * separate transaction */ 2007 * separate transaction */
2000 DQUOT_INIT(dentry->d_inode); 2008 DQUOT_INIT(dentry->d_inode);
2001 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); 2009 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
2002 if (IS_ERR(handle)) 2010 if (IS_ERR(handle))
2003 return PTR_ERR(handle); 2011 return PTR_ERR(handle);
2004 2012
@@ -2057,7 +2065,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2057 /* Initialize quotas before so that eventual writes go 2065 /* Initialize quotas before so that eventual writes go
2058 * in separate transaction */ 2066 * in separate transaction */
2059 DQUOT_INIT(dentry->d_inode); 2067 DQUOT_INIT(dentry->d_inode);
2060 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS); 2068 handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
2061 if (IS_ERR(handle)) 2069 if (IS_ERR(handle))
2062 return PTR_ERR(handle); 2070 return PTR_ERR(handle);
2063 2071
@@ -2112,9 +2120,9 @@ static int ext3_symlink (struct inode * dir,
2112 return -ENAMETOOLONG; 2120 return -ENAMETOOLONG;
2113 2121
2114retry: 2122retry:
2115 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 2123 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2116 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2124 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2117 2*EXT3_QUOTA_INIT_BLOCKS); 2125 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
2118 if (IS_ERR(handle)) 2126 if (IS_ERR(handle))
2119 return PTR_ERR(handle); 2127 return PTR_ERR(handle);
2120 2128
@@ -2166,7 +2174,7 @@ static int ext3_link (struct dentry * old_dentry,
2166 return -EMLINK; 2174 return -EMLINK;
2167 2175
2168retry: 2176retry:
2169 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS + 2177 handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
2170 EXT3_INDEX_EXTRA_TRANS_BLOCKS); 2178 EXT3_INDEX_EXTRA_TRANS_BLOCKS);
2171 if (IS_ERR(handle)) 2179 if (IS_ERR(handle))
2172 return PTR_ERR(handle); 2180 return PTR_ERR(handle);
@@ -2208,7 +2216,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2208 * in separate transaction */ 2216 * in separate transaction */
2209 if (new_dentry->d_inode) 2217 if (new_dentry->d_inode)
2210 DQUOT_INIT(new_dentry->d_inode); 2218 DQUOT_INIT(new_dentry->d_inode);
2211 handle = ext3_journal_start(old_dir, 2 * EXT3_DATA_TRANS_BLOCKS + 2219 handle = ext3_journal_start(old_dir, 2 *
2220 EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
2212 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2); 2221 EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
2213 if (IS_ERR(handle)) 2222 if (IS_ERR(handle))
2214 return PTR_ERR(handle); 2223 return PTR_ERR(handle);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 981ccb233ef5..b4b3e8a39131 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -589,7 +589,7 @@ enum {
589 Opt_commit, Opt_journal_update, Opt_journal_inum, 589 Opt_commit, Opt_journal_update, Opt_journal_inum,
590 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 590 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
591 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 591 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
592 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, 592 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
593 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, 593 Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
594}; 594};
595 595
@@ -634,10 +634,10 @@ static match_table_t tokens = {
634 {Opt_grpjquota, "grpjquota=%s"}, 634 {Opt_grpjquota, "grpjquota=%s"},
635 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 635 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
636 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 636 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
637 {Opt_ignore, "grpquota"}, 637 {Opt_quota, "grpquota"},
638 {Opt_ignore, "noquota"}, 638 {Opt_noquota, "noquota"},
639 {Opt_ignore, "quota"}, 639 {Opt_quota, "quota"},
640 {Opt_ignore, "usrquota"}, 640 {Opt_quota, "usrquota"},
641 {Opt_barrier, "barrier=%u"}, 641 {Opt_barrier, "barrier=%u"},
642 {Opt_err, NULL}, 642 {Opt_err, NULL},
643 {Opt_resize, "resize"}, 643 {Opt_resize, "resize"},
@@ -876,6 +876,7 @@ set_qf_name:
876 sbi->s_qf_names[qtype] = NULL; 876 sbi->s_qf_names[qtype] = NULL;
877 return 0; 877 return 0;
878 } 878 }
879 set_opt(sbi->s_mount_opt, QUOTA);
879 break; 880 break;
880 case Opt_offusrjquota: 881 case Opt_offusrjquota:
881 qtype = USRQUOTA; 882 qtype = USRQUOTA;
@@ -898,6 +899,17 @@ clear_qf_name:
898 case Opt_jqfmt_vfsv0: 899 case Opt_jqfmt_vfsv0:
899 sbi->s_jquota_fmt = QFMT_VFS_V0; 900 sbi->s_jquota_fmt = QFMT_VFS_V0;
900 break; 901 break;
902 case Opt_quota:
903 set_opt(sbi->s_mount_opt, QUOTA);
904 break;
905 case Opt_noquota:
906 if (sb_any_quota_enabled(sb)) {
907 printk(KERN_ERR "EXT3-fs: Cannot change quota "
908 "options when quota turned on.\n");
909 return 0;
910 }
911 clear_opt(sbi->s_mount_opt, QUOTA);
912 break;
901#else 913#else
902 case Opt_usrjquota: 914 case Opt_usrjquota:
903 case Opt_grpjquota: 915 case Opt_grpjquota:
@@ -909,6 +921,9 @@ clear_qf_name:
909 "EXT3-fs: journalled quota options not " 921 "EXT3-fs: journalled quota options not "
910 "supported.\n"); 922 "supported.\n");
911 break; 923 break;
924 case Opt_quota:
925 case Opt_noquota:
926 break;
912#endif 927#endif
913 case Opt_abort: 928 case Opt_abort:
914 set_opt(sbi->s_mount_opt, ABORT); 929 set_opt(sbi->s_mount_opt, ABORT);
@@ -2238,7 +2253,7 @@ static int ext3_dquot_initialize(struct inode *inode, int type)
2238 int ret, err; 2253 int ret, err;
2239 2254
2240 /* We may create quota structure so we need to reserve enough blocks */ 2255 /* We may create quota structure so we need to reserve enough blocks */
2241 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS); 2256 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
2242 if (IS_ERR(handle)) 2257 if (IS_ERR(handle))
2243 return PTR_ERR(handle); 2258 return PTR_ERR(handle);
2244 ret = dquot_initialize(inode, type); 2259 ret = dquot_initialize(inode, type);
@@ -2254,7 +2269,7 @@ static int ext3_dquot_drop(struct inode *inode)
2254 int ret, err; 2269 int ret, err;
2255 2270
2256 /* We may delete quota structure so we need to reserve enough blocks */ 2271 /* We may delete quota structure so we need to reserve enough blocks */
2257 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS); 2272 handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
2258 if (IS_ERR(handle)) 2273 if (IS_ERR(handle))
2259 return PTR_ERR(handle); 2274 return PTR_ERR(handle);
2260 ret = dquot_drop(inode); 2275 ret = dquot_drop(inode);
@@ -2272,7 +2287,7 @@ static int ext3_write_dquot(struct dquot *dquot)
2272 2287
2273 inode = dquot_to_inode(dquot); 2288 inode = dquot_to_inode(dquot);
2274 handle = ext3_journal_start(inode, 2289 handle = ext3_journal_start(inode,
2275 EXT3_QUOTA_TRANS_BLOCKS); 2290 EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
2276 if (IS_ERR(handle)) 2291 if (IS_ERR(handle))
2277 return PTR_ERR(handle); 2292 return PTR_ERR(handle);
2278 ret = dquot_commit(dquot); 2293 ret = dquot_commit(dquot);
@@ -2288,7 +2303,7 @@ static int ext3_acquire_dquot(struct dquot *dquot)
2288 handle_t *handle; 2303 handle_t *handle;
2289 2304
2290 handle = ext3_journal_start(dquot_to_inode(dquot), 2305 handle = ext3_journal_start(dquot_to_inode(dquot),
2291 EXT3_QUOTA_INIT_BLOCKS); 2306 EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
2292 if (IS_ERR(handle)) 2307 if (IS_ERR(handle))
2293 return PTR_ERR(handle); 2308 return PTR_ERR(handle);
2294 ret = dquot_acquire(dquot); 2309 ret = dquot_acquire(dquot);
@@ -2304,7 +2319,7 @@ static int ext3_release_dquot(struct dquot *dquot)
2304 handle_t *handle; 2319 handle_t *handle;
2305 2320
2306 handle = ext3_journal_start(dquot_to_inode(dquot), 2321 handle = ext3_journal_start(dquot_to_inode(dquot),
2307 EXT3_QUOTA_INIT_BLOCKS); 2322 EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
2308 if (IS_ERR(handle)) 2323 if (IS_ERR(handle))
2309 return PTR_ERR(handle); 2324 return PTR_ERR(handle);
2310 ret = dquot_release(dquot); 2325 ret = dquot_release(dquot);
@@ -2348,22 +2363,8 @@ static int ext3_write_info(struct super_block *sb, int type)
2348 */ 2363 */
2349static int ext3_quota_on_mount(struct super_block *sb, int type) 2364static int ext3_quota_on_mount(struct super_block *sb, int type)
2350{ 2365{
2351 int err; 2366 return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
2352 struct dentry *dentry; 2367 EXT3_SB(sb)->s_jquota_fmt, type);
2353 struct qstr name = { .name = EXT3_SB(sb)->s_qf_names[type],
2354 .hash = 0,
2355 .len = strlen(EXT3_SB(sb)->s_qf_names[type])};
2356
2357 dentry = lookup_hash(&name, sb->s_root);
2358 if (IS_ERR(dentry))
2359 return PTR_ERR(dentry);
2360 err = vfs_quota_on_mount(type, EXT3_SB(sb)->s_jquota_fmt, dentry);
2361 /* Now invalidate and put the dentry - quota got its own reference
2362 * to inode and dentry has at least wrong hash so we had better
2363 * throw it away */
2364 d_invalidate(dentry);
2365 dput(dentry);
2366 return err;
2367} 2368}
2368 2369
2369/* 2370/*
@@ -2375,6 +2376,8 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2375 int err; 2376 int err;
2376 struct nameidata nd; 2377 struct nameidata nd;
2377 2378
2379 if (!test_opt(sb, QUOTA))
2380 return -EINVAL;
2378 /* Not journalling quota? */ 2381 /* Not journalling quota? */
2379 if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] && 2382 if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
2380 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) 2383 !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 4cbc6d0212d3..3f9dfa643b19 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -1044,7 +1044,7 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name,
1044 int error, retries = 0; 1044 int error, retries = 0;
1045 1045
1046retry: 1046retry:
1047 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS); 1047 handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
1048 if (IS_ERR(handle)) { 1048 if (IS_ERR(handle)) {
1049 error = PTR_ERR(handle); 1049 error = PTR_ERR(handle);
1050 } else { 1050 } else {
diff --git a/fs/file_table.c b/fs/file_table.c
index 03d83cb686b1..fa7849fae134 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -63,42 +63,45 @@ static inline void file_free(struct file *f)
63 */ 63 */
64struct file *get_empty_filp(void) 64struct file *get_empty_filp(void)
65{ 65{
66static int old_max; 66 static int old_max;
67 struct file * f; 67 struct file * f;
68 68
69 /* 69 /*
70 * Privileged users can go above max_files 70 * Privileged users can go above max_files
71 */ 71 */
72 if (files_stat.nr_files < files_stat.max_files || 72 if (files_stat.nr_files >= files_stat.max_files &&
73 capable(CAP_SYS_ADMIN)) { 73 !capable(CAP_SYS_ADMIN))
74 f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); 74 goto over;
75 if (f) { 75
76 memset(f, 0, sizeof(*f)); 76 f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
77 if (security_file_alloc(f)) { 77 if (f == NULL)
78 file_free(f); 78 goto fail;
79 goto fail; 79
80 } 80 memset(f, 0, sizeof(*f));
81 eventpoll_init_file(f); 81 if (security_file_alloc(f))
82 atomic_set(&f->f_count, 1); 82 goto fail_sec;
83 f->f_uid = current->fsuid; 83
84 f->f_gid = current->fsgid; 84 eventpoll_init_file(f);
85 rwlock_init(&f->f_owner.lock); 85 atomic_set(&f->f_count, 1);
86 /* f->f_version: 0 */ 86 f->f_uid = current->fsuid;
87 INIT_LIST_HEAD(&f->f_list); 87 f->f_gid = current->fsgid;
88 f->f_maxcount = INT_MAX; 88 rwlock_init(&f->f_owner.lock);
89 return f; 89 /* f->f_version: 0 */
90 } 90 INIT_LIST_HEAD(&f->f_list);
91 } 91 f->f_maxcount = INT_MAX;
92 92 return f;
93
94over:
93 /* Ran out of filps - report that */ 95 /* Ran out of filps - report that */
94 if (files_stat.max_files >= old_max) { 96 if (files_stat.nr_files > old_max) {
95 printk(KERN_INFO "VFS: file-max limit %d reached\n", 97 printk(KERN_INFO "VFS: file-max limit %d reached\n",
96 files_stat.max_files); 98 files_stat.max_files);
97 old_max = files_stat.max_files; 99 old_max = files_stat.nr_files;
98 } else {
99 /* Big problems... */
100 printk(KERN_WARNING "VFS: filp allocation failed\n");
101 } 100 }
101 goto fail;
102
103fail_sec:
104 file_free(f);
102fail: 105fail:
103 return NULL; 106 return NULL;
104} 107}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8e050fa58218..e94ab398b717 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -485,32 +485,6 @@ static void set_sb_syncing(int val)
485 spin_unlock(&sb_lock); 485 spin_unlock(&sb_lock);
486} 486}
487 487
488/*
489 * Find a superblock with inodes that need to be synced
490 */
491static struct super_block *get_super_to_sync(void)
492{
493 struct super_block *sb;
494restart:
495 spin_lock(&sb_lock);
496 sb = sb_entry(super_blocks.prev);
497 for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.prev)) {
498 if (sb->s_syncing)
499 continue;
500 sb->s_syncing = 1;
501 sb->s_count++;
502 spin_unlock(&sb_lock);
503 down_read(&sb->s_umount);
504 if (!sb->s_root) {
505 drop_super(sb);
506 goto restart;
507 }
508 return sb;
509 }
510 spin_unlock(&sb_lock);
511 return NULL;
512}
513
514/** 488/**
515 * sync_inodes - writes all inodes to disk 489 * sync_inodes - writes all inodes to disk
516 * @wait: wait for completion 490 * @wait: wait for completion
@@ -530,23 +504,39 @@ restart:
530 * outstanding dirty inodes, the writeback goes block-at-a-time within the 504 * outstanding dirty inodes, the writeback goes block-at-a-time within the
531 * filesystem's write_inode(). This is extremely slow. 505 * filesystem's write_inode(). This is extremely slow.
532 */ 506 */
533void sync_inodes(int wait) 507static void __sync_inodes(int wait)
534{ 508{
535 struct super_block *sb; 509 struct super_block *sb;
536 510
537 set_sb_syncing(0); 511 spin_lock(&sb_lock);
538 while ((sb = get_super_to_sync()) != NULL) { 512restart:
539 sync_inodes_sb(sb, 0); 513 list_for_each_entry(sb, &super_blocks, s_list) {
540 sync_blockdev(sb->s_bdev); 514 if (sb->s_syncing)
541 drop_super(sb); 515 continue;
516 sb->s_syncing = 1;
517 sb->s_count++;
518 spin_unlock(&sb_lock);
519 down_read(&sb->s_umount);
520 if (sb->s_root) {
521 sync_inodes_sb(sb, wait);
522 sync_blockdev(sb->s_bdev);
523 }
524 up_read(&sb->s_umount);
525 spin_lock(&sb_lock);
526 if (__put_super_and_need_restart(sb))
527 goto restart;
542 } 528 }
529 spin_unlock(&sb_lock);
530}
531
532void sync_inodes(int wait)
533{
534 set_sb_syncing(0);
535 __sync_inodes(0);
536
543 if (wait) { 537 if (wait) {
544 set_sb_syncing(0); 538 set_sb_syncing(0);
545 while ((sb = get_super_to_sync()) != NULL) { 539 __sync_inodes(1);
546 sync_inodes_sb(sb, 1);
547 sync_blockdev(sb->s_bdev);
548 drop_super(sb);
549 }
550 } 540 }
551} 541}
552 542
diff --git a/fs/inode.c b/fs/inode.c
index 801fe7f36280..1f9a3a2b89bc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -500,7 +500,7 @@ repeat:
500 continue; 500 continue;
501 if (!test(inode, data)) 501 if (!test(inode, data))
502 continue; 502 continue;
503 if (inode->i_state & (I_FREEING|I_CLEAR)) { 503 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
504 __wait_on_freeing_inode(inode); 504 __wait_on_freeing_inode(inode);
505 goto repeat; 505 goto repeat;
506 } 506 }
@@ -525,7 +525,7 @@ repeat:
525 continue; 525 continue;
526 if (inode->i_sb != sb) 526 if (inode->i_sb != sb)
527 continue; 527 continue;
528 if (inode->i_state & (I_FREEING|I_CLEAR)) { 528 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
529 __wait_on_freeing_inode(inode); 529 __wait_on_freeing_inode(inode);
530 goto repeat; 530 goto repeat;
531 } 531 }
@@ -727,7 +727,7 @@ EXPORT_SYMBOL(iunique);
727struct inode *igrab(struct inode *inode) 727struct inode *igrab(struct inode *inode)
728{ 728{
729 spin_lock(&inode_lock); 729 spin_lock(&inode_lock);
730 if (!(inode->i_state & I_FREEING)) 730 if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
731 __iget(inode); 731 __iget(inode);
732 else 732 else
733 /* 733 /*
@@ -1024,17 +1024,21 @@ static void generic_forget_inode(struct inode *inode)
1024 if (!(inode->i_state & (I_DIRTY|I_LOCK))) 1024 if (!(inode->i_state & (I_DIRTY|I_LOCK)))
1025 list_move(&inode->i_list, &inode_unused); 1025 list_move(&inode->i_list, &inode_unused);
1026 inodes_stat.nr_unused++; 1026 inodes_stat.nr_unused++;
1027 spin_unlock(&inode_lock); 1027 if (!sb || (sb->s_flags & MS_ACTIVE)) {
1028 if (!sb || (sb->s_flags & MS_ACTIVE)) 1028 spin_unlock(&inode_lock);
1029 return; 1029 return;
1030 }
1031 inode->i_state |= I_WILL_FREE;
1032 spin_unlock(&inode_lock);
1030 write_inode_now(inode, 1); 1033 write_inode_now(inode, 1);
1031 spin_lock(&inode_lock); 1034 spin_lock(&inode_lock);
1035 inode->i_state &= ~I_WILL_FREE;
1032 inodes_stat.nr_unused--; 1036 inodes_stat.nr_unused--;
1033 hlist_del_init(&inode->i_hash); 1037 hlist_del_init(&inode->i_hash);
1034 } 1038 }
1035 list_del_init(&inode->i_list); 1039 list_del_init(&inode->i_list);
1036 list_del_init(&inode->i_sb_list); 1040 list_del_init(&inode->i_sb_list);
1037 inode->i_state|=I_FREEING; 1041 inode->i_state |= I_FREEING;
1038 inodes_stat.nr_inodes--; 1042 inodes_stat.nr_inodes--;
1039 spin_unlock(&inode_lock); 1043 spin_unlock(&inode_lock);
1040 if (inode->i_data.nrpages) 1044 if (inode->i_data.nrpages)
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 30a2bf9eeda5..e892dab40c26 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -21,6 +21,7 @@
21#include <linux/sched.h> 21#include <linux/sched.h>
22#include <linux/fs.h> 22#include <linux/fs.h>
23#include <linux/quotaops.h> 23#include <linux/quotaops.h>
24#include <linux/posix_acl_xattr.h>
24#include "jfs_incore.h" 25#include "jfs_incore.h"
25#include "jfs_xattr.h" 26#include "jfs_xattr.h"
26#include "jfs_acl.h" 27#include "jfs_acl.h"
@@ -36,11 +37,11 @@ static struct posix_acl *jfs_get_acl(struct inode *inode, int type)
36 37
37 switch(type) { 38 switch(type) {
38 case ACL_TYPE_ACCESS: 39 case ACL_TYPE_ACCESS:
39 ea_name = XATTR_NAME_ACL_ACCESS; 40 ea_name = POSIX_ACL_XATTR_ACCESS;
40 p_acl = &ji->i_acl; 41 p_acl = &ji->i_acl;
41 break; 42 break;
42 case ACL_TYPE_DEFAULT: 43 case ACL_TYPE_DEFAULT:
43 ea_name = XATTR_NAME_ACL_DEFAULT; 44 ea_name = POSIX_ACL_XATTR_DEFAULT;
44 p_acl = &ji->i_default_acl; 45 p_acl = &ji->i_default_acl;
45 break; 46 break;
46 default: 47 default:
@@ -88,11 +89,11 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
88 89
89 switch(type) { 90 switch(type) {
90 case ACL_TYPE_ACCESS: 91 case ACL_TYPE_ACCESS:
91 ea_name = XATTR_NAME_ACL_ACCESS; 92 ea_name = POSIX_ACL_XATTR_ACCESS;
92 p_acl = &ji->i_acl; 93 p_acl = &ji->i_acl;
93 break; 94 break;
94 case ACL_TYPE_DEFAULT: 95 case ACL_TYPE_DEFAULT:
95 ea_name = XATTR_NAME_ACL_DEFAULT; 96 ea_name = POSIX_ACL_XATTR_DEFAULT;
96 p_acl = &ji->i_default_acl; 97 p_acl = &ji->i_default_acl;
97 if (!S_ISDIR(inode->i_mode)) 98 if (!S_ISDIR(inode->i_mode))
98 return acl ? -EACCES : 0; 99 return acl ? -EACCES : 0;
@@ -101,7 +102,7 @@ static int jfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
101 return -EINVAL; 102 return -EINVAL;
102 } 103 }
103 if (acl) { 104 if (acl) {
104 size = xattr_acl_size(acl->a_count); 105 size = posix_acl_xattr_size(acl->a_count);
105 value = kmalloc(size, GFP_KERNEL); 106 value = kmalloc(size, GFP_KERNEL);
106 if (!value) 107 if (!value)
107 return -ENOMEM; 108 return -ENOMEM;
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index d2ae430adecf..a3acd3eec059 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,8 +20,6 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23#include <linux/xattr_acl.h>
24
25int jfs_permission(struct inode *, int, struct nameidata *); 23int jfs_permission(struct inode *, int, struct nameidata *);
26int jfs_init_acl(struct inode *, struct inode *); 24int jfs_init_acl(struct inode *, struct inode *);
27int jfs_setattr(struct dentry *, struct iattr *); 25int jfs_setattr(struct dentry *, struct iattr *);
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 810a3653d8b3..ee32211288ce 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -24,6 +24,7 @@
24#include <linux/completion.h> 24#include <linux/completion.h>
25#include <linux/vfs.h> 25#include <linux/vfs.h>
26#include <linux/moduleparam.h> 26#include <linux/moduleparam.h>
27#include <linux/posix_acl.h>
27#include <asm/uaccess.h> 28#include <asm/uaccess.h>
28 29
29#include "jfs_incore.h" 30#include "jfs_incore.h"
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 6016373701a3..ee438d429d45 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -19,6 +19,7 @@
19 19
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/xattr.h> 21#include <linux/xattr.h>
22#include <linux/posix_acl_xattr.h>
22#include <linux/quotaops.h> 23#include <linux/quotaops.h>
23#include "jfs_incore.h" 24#include "jfs_incore.h"
24#include "jfs_superblock.h" 25#include "jfs_superblock.h"
@@ -718,9 +719,9 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
718 return -EPERM; 719 return -EPERM;
719 720
720 /* 721 /*
721 * XATTR_NAME_ACL_ACCESS is tied to i_mode 722 * POSIX_ACL_XATTR_ACCESS is tied to i_mode
722 */ 723 */
723 if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) { 724 if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) {
724 acl = posix_acl_from_xattr(value, value_len); 725 acl = posix_acl_from_xattr(value, value_len);
725 if (IS_ERR(acl)) { 726 if (IS_ERR(acl)) {
726 rc = PTR_ERR(acl); 727 rc = PTR_ERR(acl);
@@ -750,7 +751,7 @@ static int can_set_system_xattr(struct inode *inode, const char *name,
750 JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED; 751 JFS_IP(inode)->i_acl = JFS_ACL_NOT_CACHED;
751 752
752 return 0; 753 return 0;
753 } else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) { 754 } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
754 acl = posix_acl_from_xattr(value, value_len); 755 acl = posix_acl_from_xattr(value, value_len);
755 if (IS_ERR(acl)) { 756 if (IS_ERR(acl)) {
756 rc = PTR_ERR(acl); 757 rc = PTR_ERR(acl);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b82e470912e8..6e242556b903 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -191,7 +191,9 @@ lockd(struct svc_rqst *rqstp)
191 printk(KERN_DEBUG 191 printk(KERN_DEBUG
192 "lockd: new process, skipping host shutdown\n"); 192 "lockd: new process, skipping host shutdown\n");
193 wake_up(&lockd_exit); 193 wake_up(&lockd_exit);
194 194
195 flush_signals(current);
196
195 /* Exit the RPC thread */ 197 /* Exit the RPC thread */
196 svc_exit_thread(rqstp); 198 svc_exit_thread(rqstp);
197 199
diff --git a/fs/namei.c b/fs/namei.c
index a7f7f44119b3..fa8df81ce8ca 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1577,19 +1577,35 @@ do_link:
1577 * 1577 *
1578 * Simple function to lookup and return a dentry and create it 1578 * Simple function to lookup and return a dentry and create it
1579 * if it doesn't exist. Is SMP-safe. 1579 * if it doesn't exist. Is SMP-safe.
1580 *
1581 * Returns with nd->dentry->d_inode->i_sem locked.
1580 */ 1582 */
1581struct dentry *lookup_create(struct nameidata *nd, int is_dir) 1583struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1582{ 1584{
1583 struct dentry *dentry; 1585 struct dentry *dentry = ERR_PTR(-EEXIST);
1584 1586
1585 down(&nd->dentry->d_inode->i_sem); 1587 down(&nd->dentry->d_inode->i_sem);
1586 dentry = ERR_PTR(-EEXIST); 1588 /*
1589 * Yucky last component or no last component at all?
1590 * (foo/., foo/.., /////)
1591 */
1587 if (nd->last_type != LAST_NORM) 1592 if (nd->last_type != LAST_NORM)
1588 goto fail; 1593 goto fail;
1589 nd->flags &= ~LOOKUP_PARENT; 1594 nd->flags &= ~LOOKUP_PARENT;
1595
1596 /*
1597 * Do the final lookup.
1598 */
1590 dentry = lookup_hash(&nd->last, nd->dentry); 1599 dentry = lookup_hash(&nd->last, nd->dentry);
1591 if (IS_ERR(dentry)) 1600 if (IS_ERR(dentry))
1592 goto fail; 1601 goto fail;
1602
1603 /*
1604 * Special case - lookup gave negative, but... we had foo/bar/
1605 * From the vfs_mknod() POV we just have a negative dentry -
1606 * all is fine. Let's be bastards - you had / on the end, you've
1607 * been asking for (non-existent) directory. -ENOENT for you.
1608 */
1593 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) 1609 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1594 goto enoent; 1610 goto enoent;
1595 return dentry; 1611 return dentry;
diff --git a/fs/namespace.c b/fs/namespace.c
index 3b93e5d750eb..208c079e9fdb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -337,7 +337,7 @@ int may_umount(struct vfsmount *mnt)
337 337
338EXPORT_SYMBOL(may_umount); 338EXPORT_SYMBOL(may_umount);
339 339
340void umount_tree(struct vfsmount *mnt) 340static void umount_tree(struct vfsmount *mnt)
341{ 341{
342 struct vfsmount *p; 342 struct vfsmount *p;
343 LIST_HEAD(kill); 343 LIST_HEAD(kill);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d6a30c844de3..6537f2c4ae44 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -751,11 +751,6 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
751 retval = -EFAULT; 751 retval = -EFAULT;
752 if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len)) 752 if (!access_ok(VERIFY_READ, iov.iov_base, iov.iov_len))
753 goto out; 753 goto out;
754 if (file->f_error) {
755 retval = file->f_error;
756 file->f_error = 0;
757 goto out;
758 }
759 retval = -EFBIG; 754 retval = -EFBIG;
760 if (limit != RLIM_INFINITY) { 755 if (limit != RLIM_INFINITY) {
761 if (pos >= limit) { 756 if (pos >= limit) {
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9f043f44c92f..ce341dc76d5e 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -10,5 +10,5 @@ nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
10nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o 10nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
11nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o 11nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
12nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ 12nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
13 nfs4acl.o nfs4callback.o 13 nfs4acl.o nfs4callback.o nfs4recover.o
14nfsd-objs := $(nfsd-y) 14nfsd-objs := $(nfsd-y)
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 11ebf6c4aa54..4a2105552ac4 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -125,7 +125,7 @@ static short ace2type(struct nfs4_ace *);
125static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int); 125static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int);
126static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int); 126static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int);
127int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t); 127int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
128int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *); 128static int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
129 129
130struct nfs4_acl * 130struct nfs4_acl *
131nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl, 131nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
@@ -775,7 +775,7 @@ out_err:
775 return pacl; 775 return pacl;
776} 776}
777 777
778int 778static int
779nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl) 779nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
780{ 780{
781 struct list_head *h, *n; 781 struct list_head *h, *n;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 634465e9cfc6..583c0710e45e 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -54,7 +54,6 @@
54 54
55/* declarations */ 55/* declarations */
56static void nfs4_cb_null(struct rpc_task *task); 56static void nfs4_cb_null(struct rpc_task *task);
57extern spinlock_t recall_lock;
58 57
59/* Index of predefined Linux callback client operations */ 58/* Index of predefined Linux callback client operations */
60 59
@@ -329,12 +328,12 @@ out:
329 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \ 328 .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
330} 329}
331 330
332struct rpc_procinfo nfs4_cb_procedures[] = { 331static struct rpc_procinfo nfs4_cb_procedures[] = {
333 PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null), 332 PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null),
334 PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall), 333 PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall),
335}; 334};
336 335
337struct rpc_version nfs_cb_version4 = { 336static struct rpc_version nfs_cb_version4 = {
338 .number = 1, 337 .number = 1,
339 .nrprocs = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]), 338 .nrprocs = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
340 .procs = nfs4_cb_procedures 339 .procs = nfs4_cb_procedures
@@ -348,7 +347,7 @@ static struct rpc_version * nfs_cb_version[] = {
348/* 347/*
349 * Use the SETCLIENTID credential 348 * Use the SETCLIENTID credential
350 */ 349 */
351struct rpc_cred * 350static struct rpc_cred *
352nfsd4_lookupcred(struct nfs4_client *clp, int taskflags) 351nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
353{ 352{
354 struct auth_cred acred; 353 struct auth_cred acred;
@@ -387,9 +386,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
387 char hostname[32]; 386 char hostname[32];
388 int status; 387 int status;
389 388
390 dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n", 389 if (atomic_read(&cb->cb_set))
391 cb->cb_parsed, atomic_read(&cb->cb_set));
392 if (!cb->cb_parsed || atomic_read(&cb->cb_set))
393 return; 390 return;
394 391
395 /* Initialize address */ 392 /* Initialize address */
@@ -427,7 +424,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
427 * XXX AUTH_UNIX only - need AUTH_GSS.... 424 * XXX AUTH_UNIX only - need AUTH_GSS....
428 */ 425 */
429 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr)); 426 sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
430 clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX); 427 clnt = rpc_new_client(xprt, hostname, program, 1, RPC_AUTH_UNIX);
431 if (IS_ERR(clnt)) { 428 if (IS_ERR(clnt)) {
432 dprintk("NFSD: couldn't create callback client\n"); 429 dprintk("NFSD: couldn't create callback client\n");
433 goto out_err; 430 goto out_err;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 4ba540841cf6..5605a26efc57 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -104,7 +104,7 @@ ent_update(struct ent *new, struct ent *itm)
104 ent_init(new, itm); 104 ent_init(new, itm);
105} 105}
106 106
107void 107static void
108ent_put(struct cache_head *ch, struct cache_detail *cd) 108ent_put(struct cache_head *ch, struct cache_detail *cd)
109{ 109{
110 if (cache_put(ch, cd)) { 110 if (cache_put(ch, cd)) {
@@ -186,7 +186,7 @@ warn_no_idmapd(struct cache_detail *detail)
186static int idtoname_parse(struct cache_detail *, char *, int); 186static int idtoname_parse(struct cache_detail *, char *, int);
187static struct ent *idtoname_lookup(struct ent *, int); 187static struct ent *idtoname_lookup(struct ent *, int);
188 188
189struct cache_detail idtoname_cache = { 189static struct cache_detail idtoname_cache = {
190 .hash_size = ENT_HASHMAX, 190 .hash_size = ENT_HASHMAX,
191 .hash_table = idtoname_table, 191 .hash_table = idtoname_table,
192 .name = "nfs4.idtoname", 192 .name = "nfs4.idtoname",
@@ -277,7 +277,7 @@ nametoid_hash(struct ent *ent)
277 return hash_str(ent->name, ENT_HASHBITS); 277 return hash_str(ent->name, ENT_HASHBITS);
278} 278}
279 279
280void 280static void
281nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, 281nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
282 int *blen) 282 int *blen)
283{ 283{
@@ -317,9 +317,9 @@ nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
317} 317}
318 318
319static struct ent *nametoid_lookup(struct ent *, int); 319static struct ent *nametoid_lookup(struct ent *, int);
320int nametoid_parse(struct cache_detail *, char *, int); 320static int nametoid_parse(struct cache_detail *, char *, int);
321 321
322struct cache_detail nametoid_cache = { 322static struct cache_detail nametoid_cache = {
323 .hash_size = ENT_HASHMAX, 323 .hash_size = ENT_HASHMAX,
324 .hash_table = nametoid_table, 324 .hash_table = nametoid_table,
325 .name = "nfs4.nametoid", 325 .name = "nfs4.nametoid",
@@ -330,7 +330,7 @@ struct cache_detail nametoid_cache = {
330 .warn_no_listener = warn_no_idmapd, 330 .warn_no_listener = warn_no_idmapd,
331}; 331};
332 332
333int 333static int
334nametoid_parse(struct cache_detail *cd, char *buf, int buflen) 334nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
335{ 335{
336 struct ent ent, *res; 336 struct ent ent, *res;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e8158741e8b5..d71f14517b9c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -45,6 +45,7 @@
45#include <linux/param.h> 45#include <linux/param.h>
46#include <linux/major.h> 46#include <linux/major.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/file.h>
48 49
49#include <linux/sunrpc/svc.h> 50#include <linux/sunrpc/svc.h>
50#include <linux/nfsd/nfsd.h> 51#include <linux/nfsd/nfsd.h>
@@ -198,6 +199,11 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
198 if (status) 199 if (status)
199 goto out; 200 goto out;
200 switch (open->op_claim_type) { 201 switch (open->op_claim_type) {
202 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
203 status = nfserr_inval;
204 if (open->op_create)
205 goto out;
206 /* fall through */
201 case NFS4_OPEN_CLAIM_NULL: 207 case NFS4_OPEN_CLAIM_NULL:
202 /* 208 /*
203 * (1) set CURRENT_FH to the file being opened, 209 * (1) set CURRENT_FH to the file being opened,
@@ -220,7 +226,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
220 if (status) 226 if (status)
221 goto out; 227 goto out;
222 break; 228 break;
223 case NFS4_OPEN_CLAIM_DELEGATE_CUR:
224 case NFS4_OPEN_CLAIM_DELEGATE_PREV: 229 case NFS4_OPEN_CLAIM_DELEGATE_PREV:
225 printk("NFSD: unsupported OPEN claim type %d\n", 230 printk("NFSD: unsupported OPEN claim type %d\n",
226 open->op_claim_type); 231 open->op_claim_type);
@@ -473,26 +478,27 @@ static inline int
473nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read) 478nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
474{ 479{
475 int status; 480 int status;
476 struct file *filp = NULL;
477 481
478 /* no need to check permission - this will be done in nfsd_read() */ 482 /* no need to check permission - this will be done in nfsd_read() */
479 483
484 read->rd_filp = NULL;
480 if (read->rd_offset >= OFFSET_MAX) 485 if (read->rd_offset >= OFFSET_MAX)
481 return nfserr_inval; 486 return nfserr_inval;
482 487
483 nfs4_lock_state(); 488 nfs4_lock_state();
484 /* check stateid */ 489 /* check stateid */
485 if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid, 490 if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
486 CHECK_FH | RD_STATE, &filp))) { 491 CHECK_FH | RD_STATE, &read->rd_filp))) {
487 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n"); 492 dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
488 goto out; 493 goto out;
489 } 494 }
495 if (read->rd_filp)
496 get_file(read->rd_filp);
490 status = nfs_ok; 497 status = nfs_ok;
491out: 498out:
492 nfs4_unlock_state(); 499 nfs4_unlock_state();
493 read->rd_rqstp = rqstp; 500 read->rd_rqstp = rqstp;
494 read->rd_fhp = current_fh; 501 read->rd_fhp = current_fh;
495 read->rd_filp = filp;
496 return status; 502 return status;
497} 503}
498 504
@@ -532,6 +538,8 @@ nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_rem
532{ 538{
533 int status; 539 int status;
534 540
541 if (nfs4_in_grace())
542 return nfserr_grace;
535 status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen); 543 status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
536 if (status == nfserr_symlink) 544 if (status == nfserr_symlink)
537 return nfserr_notdir; 545 return nfserr_notdir;
@@ -550,6 +558,9 @@ nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
550 558
551 if (!save_fh->fh_dentry) 559 if (!save_fh->fh_dentry)
552 return status; 560 return status;
561 if (nfs4_in_grace() && !(save_fh->fh_export->ex_flags
562 & NFSEXP_NOSUBTREECHECK))
563 return nfserr_grace;
553 status = nfsd_rename(rqstp, save_fh, rename->rn_sname, 564 status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
554 rename->rn_snamelen, current_fh, 565 rename->rn_snamelen, current_fh,
555 rename->rn_tname, rename->rn_tnamelen); 566 rename->rn_tname, rename->rn_tnamelen);
@@ -624,6 +635,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
624 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n"); 635 dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
625 goto out; 636 goto out;
626 } 637 }
638 if (filp)
639 get_file(filp);
627 nfs4_unlock_state(); 640 nfs4_unlock_state();
628 641
629 write->wr_bytes_written = write->wr_buflen; 642 write->wr_bytes_written = write->wr_buflen;
@@ -635,6 +648,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
635 status = nfsd_write(rqstp, current_fh, filp, write->wr_offset, 648 status = nfsd_write(rqstp, current_fh, filp, write->wr_offset,
636 write->wr_vec, write->wr_vlen, write->wr_buflen, 649 write->wr_vec, write->wr_vlen, write->wr_buflen,
637 &write->wr_how_written); 650 &write->wr_how_written);
651 if (filp)
652 fput(filp);
638 653
639 if (status == nfserr_symlink) 654 if (status == nfserr_symlink)
640 status = nfserr_inval; 655 status = nfserr_inval;
@@ -923,6 +938,9 @@ encode_op:
923 nfs4_put_stateowner(replay_owner); 938 nfs4_put_stateowner(replay_owner);
924 replay_owner = NULL; 939 replay_owner = NULL;
925 } 940 }
941 /* XXX Ugh, we need to get rid of this kind of special case: */
942 if (op->opnum == OP_READ && op->u.read.rd_filp)
943 fput(op->u.read.rd_filp);
926 } 944 }
927 945
928out: 946out:
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
new file mode 100644
index 000000000000..095f1740f3ae
--- /dev/null
+++ b/fs/nfsd/nfs4recover.c
@@ -0,0 +1,431 @@
1/*
2* linux/fs/nfsd/nfs4recover.c
3*
4* Copyright (c) 2004 The Regents of the University of Michigan.
5* All rights reserved.
6*
7* Andy Adamson <andros@citi.umich.edu>
8*
9* Redistribution and use in source and binary forms, with or without
10* modification, are permitted provided that the following conditions
11* are met:
12*
13* 1. Redistributions of source code must retain the above copyright
14* notice, this list of conditions and the following disclaimer.
15* 2. Redistributions in binary form must reproduce the above copyright
16* notice, this list of conditions and the following disclaimer in the
17* documentation and/or other materials provided with the distribution.
18* 3. Neither the name of the University nor the names of its
19* contributors may be used to endorse or promote products derived
20* from this software without specific prior written permission.
21*
22* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
23* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33*
34*/
35
36
37#include <linux/sunrpc/svc.h>
38#include <linux/nfsd/nfsd.h>
39#include <linux/nfs4.h>
40#include <linux/nfsd/state.h>
41#include <linux/nfsd/xdr4.h>
42#include <linux/param.h>
43#include <linux/file.h>
44#include <linux/namei.h>
45#include <asm/uaccess.h>
46#include <asm/scatterlist.h>
47#include <linux/crypto.h>
48
49
50#define NFSDDBG_FACILITY NFSDDBG_PROC
51
52/* Globals */
53static struct nameidata rec_dir;
54static int rec_dir_init = 0;
55
56static void
57nfs4_save_user(uid_t *saveuid, gid_t *savegid)
58{
59 *saveuid = current->fsuid;
60 *savegid = current->fsgid;
61 current->fsuid = 0;
62 current->fsgid = 0;
63}
64
65static void
66nfs4_reset_user(uid_t saveuid, gid_t savegid)
67{
68 current->fsuid = saveuid;
69 current->fsgid = savegid;
70}
71
72static void
73md5_to_hex(char *out, char *md5)
74{
75 int i;
76
77 for (i=0; i<16; i++) {
78 unsigned char c = md5[i];
79
80 *out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
81 *out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
82 }
83 *out = '\0';
84}
85
86int
87nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
88{
89 struct xdr_netobj cksum;
90 struct crypto_tfm *tfm;
91 struct scatterlist sg[1];
92 int status = nfserr_resource;
93
94 dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
95 clname->len, clname->data);
96 tfm = crypto_alloc_tfm("md5", 0);
97 if (tfm == NULL)
98 goto out;
99 cksum.len = crypto_tfm_alg_digestsize(tfm);
100 cksum.data = kmalloc(cksum.len, GFP_KERNEL);
101 if (cksum.data == NULL)
102 goto out;
103 crypto_digest_init(tfm);
104
105 sg[0].page = virt_to_page(clname->data);
106 sg[0].offset = offset_in_page(clname->data);
107 sg[0].length = clname->len;
108
109 crypto_digest_update(tfm, sg, 1);
110 crypto_digest_final(tfm, cksum.data);
111
112 md5_to_hex(dname, cksum.data);
113
114 kfree(cksum.data);
115 status = nfs_ok;
116out:
117 if (tfm)
118 crypto_free_tfm(tfm);
119 return status;
120}
121
122static int
123nfsd4_rec_fsync(struct dentry *dentry)
124{
125 struct file *filp;
126 int status = nfs_ok;
127
128 dprintk("NFSD: nfs4_fsync_rec_dir\n");
129 filp = dentry_open(dget(dentry), mntget(rec_dir.mnt), O_RDWR);
130 if (IS_ERR(filp)) {
131 status = PTR_ERR(filp);
132 goto out;
133 }
134 if (filp->f_op && filp->f_op->fsync)
135 status = filp->f_op->fsync(filp, filp->f_dentry, 0);
136 fput(filp);
137out:
138 if (status)
139 printk("nfsd4: unable to sync recovery directory\n");
140 return status;
141}
142
143int
144nfsd4_create_clid_dir(struct nfs4_client *clp)
145{
146 char *dname = clp->cl_recdir;
147 struct dentry *dentry;
148 uid_t uid;
149 gid_t gid;
150 int status;
151
152 dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
153
154 if (!rec_dir_init || clp->cl_firststate)
155 return 0;
156
157 nfs4_save_user(&uid, &gid);
158
159 /* lock the parent */
160 down(&rec_dir.dentry->d_inode->i_sem);
161
162 dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
163 if (IS_ERR(dentry)) {
164 status = PTR_ERR(dentry);
165 goto out_unlock;
166 }
167 status = -EEXIST;
168 if (dentry->d_inode) {
169 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
170 goto out_put;
171 }
172 status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
173out_put:
174 dput(dentry);
175out_unlock:
176 up(&rec_dir.dentry->d_inode->i_sem);
177 if (status == 0) {
178 clp->cl_firststate = 1;
179 status = nfsd4_rec_fsync(rec_dir.dentry);
180 }
181 nfs4_reset_user(uid, gid);
182 dprintk("NFSD: nfsd4_create_clid_dir returns %d\n", status);
183 return status;
184}
185
186typedef int (recdir_func)(struct dentry *, struct dentry *);
187
188struct dentry_list {
189 struct dentry *dentry;
190 struct list_head list;
191};
192
193struct dentry_list_arg {
194 struct list_head dentries;
195 struct dentry *parent;
196};
197
198static int
199nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
200 loff_t offset, ino_t ino, unsigned int d_type)
201{
202 struct dentry_list_arg *dla = arg;
203 struct list_head *dentries = &dla->dentries;
204 struct dentry *parent = dla->parent;
205 struct dentry *dentry;
206 struct dentry_list *child;
207
208 if (name && isdotent(name, namlen))
209 return nfs_ok;
210 dentry = lookup_one_len(name, parent, namlen);
211 if (IS_ERR(dentry))
212 return PTR_ERR(dentry);
213 child = kmalloc(sizeof(*child), GFP_KERNEL);
214 if (child == NULL)
215 return -ENOMEM;
216 child->dentry = dentry;
217 list_add(&child->list, dentries);
218 return 0;
219}
220
221static int
222nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
223{
224 struct file *filp;
225 struct dentry_list_arg dla = {
226 .parent = dir,
227 };
228 struct list_head *dentries = &dla.dentries;
229 struct dentry_list *child;
230 uid_t uid;
231 gid_t gid;
232 int status;
233
234 if (!rec_dir_init)
235 return 0;
236
237 nfs4_save_user(&uid, &gid);
238
239 filp = dentry_open(dget(dir), mntget(rec_dir.mnt),
240 O_RDWR);
241 status = PTR_ERR(filp);
242 if (IS_ERR(filp))
243 goto out;
244 INIT_LIST_HEAD(dentries);
245 status = vfs_readdir(filp, nfsd4_build_dentrylist, &dla);
246 fput(filp);
247 while (!list_empty(dentries)) {
248 child = list_entry(dentries->next, struct dentry_list, list);
249 status = f(dir, child->dentry);
250 if (status)
251 goto out;
252 list_del(&child->list);
253 dput(child->dentry);
254 kfree(child);
255 }
256out:
257 while (!list_empty(dentries)) {
258 child = list_entry(dentries->next, struct dentry_list, list);
259 list_del(&child->list);
260 dput(child->dentry);
261 kfree(child);
262 }
263 nfs4_reset_user(uid, gid);
264 return status;
265}
266
267static int
268nfsd4_remove_clid_file(struct dentry *dir, struct dentry *dentry)
269{
270 int status;
271
272 if (!S_ISREG(dir->d_inode->i_mode)) {
273 printk("nfsd4: non-file found in client recovery directory\n");
274 return -EINVAL;
275 }
276 down(&dir->d_inode->i_sem);
277 status = vfs_unlink(dir->d_inode, dentry);
278 up(&dir->d_inode->i_sem);
279 return status;
280}
281
282static int
283nfsd4_clear_clid_dir(struct dentry *dir, struct dentry *dentry)
284{
285 int status;
286
287 /* For now this directory should already be empty, but we empty it of
288 * any regular files anyway, just in case the directory was created by
289 * a kernel from the future.... */
290 nfsd4_list_rec_dir(dentry, nfsd4_remove_clid_file);
291 down(&dir->d_inode->i_sem);
292 status = vfs_rmdir(dir->d_inode, dentry);
293 up(&dir->d_inode->i_sem);
294 return status;
295}
296
297static int
298nfsd4_unlink_clid_dir(char *name, int namlen)
299{
300 struct dentry *dentry;
301 int status;
302
303 dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
304
305 dentry = lookup_one_len(name, rec_dir.dentry, namlen);
306 if (IS_ERR(dentry)) {
307 status = PTR_ERR(dentry);
308 return status;
309 }
310 status = -ENOENT;
311 if (!dentry->d_inode)
312 goto out;
313
314 status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
315out:
316 dput(dentry);
317 return status;
318}
319
320void
321nfsd4_remove_clid_dir(struct nfs4_client *clp)
322{
323 uid_t uid;
324 gid_t gid;
325 int status;
326
327 if (!rec_dir_init || !clp->cl_firststate)
328 return;
329
330 nfs4_save_user(&uid, &gid);
331 status = nfsd4_unlink_clid_dir(clp->cl_recdir, HEXDIR_LEN-1);
332 nfs4_reset_user(uid, gid);
333 if (status == 0)
334 status = nfsd4_rec_fsync(rec_dir.dentry);
335 if (status)
336 printk("NFSD: Failed to remove expired client state directory"
337 " %.*s\n", HEXDIR_LEN, clp->cl_recdir);
338 return;
339}
340
341static int
342purge_old(struct dentry *parent, struct dentry *child)
343{
344 int status;
345
346 if (nfs4_has_reclaimed_state(child->d_name.name))
347 return nfs_ok;
348
349 status = nfsd4_clear_clid_dir(parent, child);
350 if (status)
351 printk("failed to remove client recovery directory %s\n",
352 child->d_name.name);
353 /* Keep trying, success or failure: */
354 return nfs_ok;
355}
356
357void
358nfsd4_recdir_purge_old(void) {
359 int status;
360
361 if (!rec_dir_init)
362 return;
363 status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
364 if (status == 0)
365 status = nfsd4_rec_fsync(rec_dir.dentry);
366 if (status)
367 printk("nfsd4: failed to purge old clients from recovery"
368 " directory %s\n", rec_dir.dentry->d_name.name);
369 return;
370}
371
372static int
373load_recdir(struct dentry *parent, struct dentry *child)
374{
375 if (child->d_name.len != HEXDIR_LEN - 1) {
376 printk("nfsd4: illegal name %s in recovery directory\n",
377 child->d_name.name);
378 /* Keep trying; maybe the others are OK: */
379 return nfs_ok;
380 }
381 nfs4_client_to_reclaim(child->d_name.name);
382 return nfs_ok;
383}
384
385int
386nfsd4_recdir_load(void) {
387 int status;
388
389 status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir);
390 if (status)
391 printk("nfsd4: failed loading clients from recovery"
392 " directory %s\n", rec_dir.dentry->d_name.name);
393 return status;
394}
395
396/*
397 * Hold reference to the recovery directory.
398 */
399
400void
401nfsd4_init_recdir(char *rec_dirname)
402{
403 uid_t uid = 0;
404 gid_t gid = 0;
405 int status;
406
407 printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
408 rec_dirname);
409
410 BUG_ON(rec_dir_init);
411
412 nfs4_save_user(&uid, &gid);
413
414 status = path_lookup(rec_dirname, LOOKUP_FOLLOW, &rec_dir);
415 if (status == -ENOENT)
416 printk("NFSD: recovery directory %s doesn't exist\n",
417 rec_dirname);
418
419 if (!status)
420 rec_dir_init = 1;
421 nfs4_reset_user(uid, gid);
422}
423
424void
425nfsd4_shutdown_recdir(void)
426{
427 if (!rec_dir_init)
428 return;
429 rec_dir_init = 0;
430 path_release(&rec_dir);
431}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 75e8b137580c..89e36526d7f2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -48,39 +48,32 @@
48#include <linux/nfs4.h> 48#include <linux/nfs4.h>
49#include <linux/nfsd/state.h> 49#include <linux/nfsd/state.h>
50#include <linux/nfsd/xdr4.h> 50#include <linux/nfsd/xdr4.h>
51#include <linux/namei.h>
51 52
52#define NFSDDBG_FACILITY NFSDDBG_PROC 53#define NFSDDBG_FACILITY NFSDDBG_PROC
53 54
54/* Globals */ 55/* Globals */
55static time_t lease_time = 90; /* default lease time */ 56static time_t lease_time = 90; /* default lease time */
56static time_t old_lease_time = 90; /* past incarnation lease time */ 57static time_t user_lease_time = 90;
57static u32 nfs4_reclaim_init = 0; 58static time_t boot_time;
58time_t boot_time; 59static int in_grace = 1;
59static time_t grace_end = 0;
60static u32 current_clientid = 1; 60static u32 current_clientid = 1;
61static u32 current_ownerid = 1; 61static u32 current_ownerid = 1;
62static u32 current_fileid = 1; 62static u32 current_fileid = 1;
63static u32 current_delegid = 1; 63static u32 current_delegid = 1;
64static u32 nfs4_init; 64static u32 nfs4_init;
65stateid_t zerostateid; /* bits all 0 */ 65static stateid_t zerostateid; /* bits all 0 */
66stateid_t onestateid; /* bits all 1 */ 66static stateid_t onestateid; /* bits all 1 */
67 67
68/* debug counters */ 68#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
69u32 list_add_perfile = 0; 69#define ONE_STATEID(stateid) (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
70u32 list_del_perfile = 0;
71u32 add_perclient = 0;
72u32 del_perclient = 0;
73u32 alloc_file = 0;
74u32 free_file = 0;
75u32 vfsopen = 0;
76u32 vfsclose = 0;
77u32 alloc_delegation= 0;
78u32 free_delegation= 0;
79 70
80/* forward declarations */ 71/* forward declarations */
81struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); 72static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
82static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); 73static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
83static void release_stateid_lockowners(struct nfs4_stateid *open_stp); 74static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
75static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
76static void nfs4_set_recdir(char *recdir);
84 77
85/* Locking: 78/* Locking:
86 * 79 *
@@ -90,6 +83,11 @@ static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
90 */ 83 */
91static DECLARE_MUTEX(client_sema); 84static DECLARE_MUTEX(client_sema);
92 85
86static kmem_cache_t *stateowner_slab = NULL;
87static kmem_cache_t *file_slab = NULL;
88static kmem_cache_t *stateid_slab = NULL;
89static kmem_cache_t *deleg_slab = NULL;
90
93void 91void
94nfs4_lock_state(void) 92nfs4_lock_state(void)
95{ 93{
@@ -118,16 +116,36 @@ opaque_hashval(const void *ptr, int nbytes)
118/* forward declarations */ 116/* forward declarations */
119static void release_stateowner(struct nfs4_stateowner *sop); 117static void release_stateowner(struct nfs4_stateowner *sop);
120static void release_stateid(struct nfs4_stateid *stp, int flags); 118static void release_stateid(struct nfs4_stateid *stp, int flags);
121static void release_file(struct nfs4_file *fp);
122 119
123/* 120/*
124 * Delegation state 121 * Delegation state
125 */ 122 */
126 123
127/* recall_lock protects the del_recall_lru */ 124/* recall_lock protects the del_recall_lru */
128spinlock_t recall_lock; 125static spinlock_t recall_lock = SPIN_LOCK_UNLOCKED;
129static struct list_head del_recall_lru; 126static struct list_head del_recall_lru;
130 127
128static void
129free_nfs4_file(struct kref *kref)
130{
131 struct nfs4_file *fp = container_of(kref, struct nfs4_file, fi_ref);
132 list_del(&fp->fi_hash);
133 iput(fp->fi_inode);
134 kmem_cache_free(file_slab, fp);
135}
136
137static inline void
138put_nfs4_file(struct nfs4_file *fi)
139{
140 kref_put(&fi->fi_ref, free_nfs4_file);
141}
142
143static inline void
144get_nfs4_file(struct nfs4_file *fi)
145{
146 kref_get(&fi->fi_ref);
147}
148
131static struct nfs4_delegation * 149static struct nfs4_delegation *
132alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type) 150alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
133{ 151{
@@ -136,13 +154,14 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
136 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback; 154 struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
137 155
138 dprintk("NFSD alloc_init_deleg\n"); 156 dprintk("NFSD alloc_init_deleg\n");
139 if ((dp = kmalloc(sizeof(struct nfs4_delegation), 157 dp = kmem_cache_alloc(deleg_slab, GFP_KERNEL);
140 GFP_KERNEL)) == NULL) 158 if (dp == NULL)
141 return dp; 159 return dp;
142 INIT_LIST_HEAD(&dp->dl_del_perfile); 160 INIT_LIST_HEAD(&dp->dl_perfile);
143 INIT_LIST_HEAD(&dp->dl_del_perclnt); 161 INIT_LIST_HEAD(&dp->dl_perclnt);
144 INIT_LIST_HEAD(&dp->dl_recall_lru); 162 INIT_LIST_HEAD(&dp->dl_recall_lru);
145 dp->dl_client = clp; 163 dp->dl_client = clp;
164 get_nfs4_file(fp);
146 dp->dl_file = fp; 165 dp->dl_file = fp;
147 dp->dl_flock = NULL; 166 dp->dl_flock = NULL;
148 get_file(stp->st_vfs_file); 167 get_file(stp->st_vfs_file);
@@ -160,9 +179,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
160 current_fh->fh_handle.fh_size); 179 current_fh->fh_handle.fh_size);
161 dp->dl_time = 0; 180 dp->dl_time = 0;
162 atomic_set(&dp->dl_count, 1); 181 atomic_set(&dp->dl_count, 1);
163 list_add(&dp->dl_del_perfile, &fp->fi_del_perfile); 182 list_add(&dp->dl_perfile, &fp->fi_delegations);
164 list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt); 183 list_add(&dp->dl_perclnt, &clp->cl_delegations);
165 alloc_delegation++;
166 return dp; 184 return dp;
167} 185}
168 186
@@ -171,8 +189,8 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
171{ 189{
172 if (atomic_dec_and_test(&dp->dl_count)) { 190 if (atomic_dec_and_test(&dp->dl_count)) {
173 dprintk("NFSD: freeing dp %p\n",dp); 191 dprintk("NFSD: freeing dp %p\n",dp);
174 kfree(dp); 192 put_nfs4_file(dp->dl_file);
175 free_delegation++; 193 kmem_cache_free(deleg_slab, dp);
176 } 194 }
177} 195}
178 196
@@ -193,15 +211,14 @@ nfs4_close_delegation(struct nfs4_delegation *dp)
193 if (dp->dl_flock) 211 if (dp->dl_flock)
194 setlease(filp, F_UNLCK, &dp->dl_flock); 212 setlease(filp, F_UNLCK, &dp->dl_flock);
195 nfsd_close(filp); 213 nfsd_close(filp);
196 vfsclose++;
197} 214}
198 215
199/* Called under the state lock. */ 216/* Called under the state lock. */
200static void 217static void
201unhash_delegation(struct nfs4_delegation *dp) 218unhash_delegation(struct nfs4_delegation *dp)
202{ 219{
203 list_del_init(&dp->dl_del_perfile); 220 list_del_init(&dp->dl_perfile);
204 list_del_init(&dp->dl_del_perclnt); 221 list_del_init(&dp->dl_perclnt);
205 spin_lock(&recall_lock); 222 spin_lock(&recall_lock);
206 list_del_init(&dp->dl_recall_lru); 223 list_del_init(&dp->dl_recall_lru);
207 spin_unlock(&recall_lock); 224 spin_unlock(&recall_lock);
@@ -220,8 +237,8 @@ unhash_delegation(struct nfs4_delegation *dp)
220 237
221#define clientid_hashval(id) \ 238#define clientid_hashval(id) \
222 ((id) & CLIENT_HASH_MASK) 239 ((id) & CLIENT_HASH_MASK)
223#define clientstr_hashval(name, namelen) \ 240#define clientstr_hashval(name) \
224 (opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK) 241 (opaque_hashval((name), 8) & CLIENT_HASH_MASK)
225/* 242/*
226 * reclaim_str_hashtbl[] holds known client info from previous reset/reboot 243 * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
227 * used in reboot/reset lease grace period processing 244 * used in reboot/reset lease grace period processing
@@ -331,11 +348,11 @@ expire_client(struct nfs4_client *clp)
331 348
332 INIT_LIST_HEAD(&reaplist); 349 INIT_LIST_HEAD(&reaplist);
333 spin_lock(&recall_lock); 350 spin_lock(&recall_lock);
334 while (!list_empty(&clp->cl_del_perclnt)) { 351 while (!list_empty(&clp->cl_delegations)) {
335 dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt); 352 dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
336 dprintk("NFSD: expire client. dp %p, fp %p\n", dp, 353 dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
337 dp->dl_flock); 354 dp->dl_flock);
338 list_del_init(&dp->dl_del_perclnt); 355 list_del_init(&dp->dl_perclnt);
339 list_move(&dp->dl_recall_lru, &reaplist); 356 list_move(&dp->dl_recall_lru, &reaplist);
340 } 357 }
341 spin_unlock(&recall_lock); 358 spin_unlock(&recall_lock);
@@ -347,26 +364,26 @@ expire_client(struct nfs4_client *clp)
347 list_del(&clp->cl_idhash); 364 list_del(&clp->cl_idhash);
348 list_del(&clp->cl_strhash); 365 list_del(&clp->cl_strhash);
349 list_del(&clp->cl_lru); 366 list_del(&clp->cl_lru);
350 while (!list_empty(&clp->cl_perclient)) { 367 while (!list_empty(&clp->cl_openowners)) {
351 sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient); 368 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
352 release_stateowner(sop); 369 release_stateowner(sop);
353 } 370 }
354 put_nfs4_client(clp); 371 put_nfs4_client(clp);
355} 372}
356 373
357static struct nfs4_client * 374static struct nfs4_client *
358create_client(struct xdr_netobj name) { 375create_client(struct xdr_netobj name, char *recdir) {
359 struct nfs4_client *clp; 376 struct nfs4_client *clp;
360 377
361 if (!(clp = alloc_client(name))) 378 if (!(clp = alloc_client(name)))
362 goto out; 379 goto out;
380 memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
363 atomic_set(&clp->cl_count, 1); 381 atomic_set(&clp->cl_count, 1);
364 atomic_set(&clp->cl_callback.cb_set, 0); 382 atomic_set(&clp->cl_callback.cb_set, 0);
365 clp->cl_callback.cb_parsed = 0;
366 INIT_LIST_HEAD(&clp->cl_idhash); 383 INIT_LIST_HEAD(&clp->cl_idhash);
367 INIT_LIST_HEAD(&clp->cl_strhash); 384 INIT_LIST_HEAD(&clp->cl_strhash);
368 INIT_LIST_HEAD(&clp->cl_perclient); 385 INIT_LIST_HEAD(&clp->cl_openowners);
369 INIT_LIST_HEAD(&clp->cl_del_perclnt); 386 INIT_LIST_HEAD(&clp->cl_delegations);
370 INIT_LIST_HEAD(&clp->cl_lru); 387 INIT_LIST_HEAD(&clp->cl_lru);
371out: 388out:
372 return clp; 389 return clp;
@@ -392,11 +409,9 @@ copy_cred(struct svc_cred *target, struct svc_cred *source) {
392 get_group_info(target->cr_group_info); 409 get_group_info(target->cr_group_info);
393} 410}
394 411
395static int 412static inline int
396cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) { 413same_name(const char *n1, const char *n2) {
397 if (!n1 || !n2) 414 return 0 == memcmp(n1, n2, HEXDIR_LEN);
398 return 0;
399 return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len));
400} 415}
401 416
402static int 417static int
@@ -446,7 +461,7 @@ check_name(struct xdr_netobj name) {
446 return 1; 461 return 1;
447} 462}
448 463
449void 464static void
450add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval) 465add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
451{ 466{
452 unsigned int idhashval; 467 unsigned int idhashval;
@@ -458,7 +473,7 @@ add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
458 clp->cl_time = get_seconds(); 473 clp->cl_time = get_seconds();
459} 474}
460 475
461void 476static void
462move_to_confirmed(struct nfs4_client *clp) 477move_to_confirmed(struct nfs4_client *clp)
463{ 478{
464 unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id); 479 unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
@@ -468,8 +483,7 @@ move_to_confirmed(struct nfs4_client *clp)
468 list_del_init(&clp->cl_strhash); 483 list_del_init(&clp->cl_strhash);
469 list_del_init(&clp->cl_idhash); 484 list_del_init(&clp->cl_idhash);
470 list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]); 485 list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
471 strhashval = clientstr_hashval(clp->cl_name.data, 486 strhashval = clientstr_hashval(clp->cl_recdir);
472 clp->cl_name.len);
473 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]); 487 list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
474 renew_client(clp); 488 renew_client(clp);
475} 489}
@@ -500,6 +514,30 @@ find_unconfirmed_client(clientid_t *clid)
500 return NULL; 514 return NULL;
501} 515}
502 516
517static struct nfs4_client *
518find_confirmed_client_by_str(const char *dname, unsigned int hashval)
519{
520 struct nfs4_client *clp;
521
522 list_for_each_entry(clp, &conf_str_hashtbl[hashval], cl_strhash) {
523 if (same_name(clp->cl_recdir, dname))
524 return clp;
525 }
526 return NULL;
527}
528
529static struct nfs4_client *
530find_unconfirmed_client_by_str(const char *dname, unsigned int hashval)
531{
532 struct nfs4_client *clp;
533
534 list_for_each_entry(clp, &unconf_str_hashtbl[hashval], cl_strhash) {
535 if (same_name(clp->cl_recdir, dname))
536 return clp;
537 }
538 return NULL;
539}
540
503/* a helper function for parse_callback */ 541/* a helper function for parse_callback */
504static int 542static int
505parse_octet(unsigned int *lenp, char **addrp) 543parse_octet(unsigned int *lenp, char **addrp)
@@ -534,7 +572,7 @@ parse_octet(unsigned int *lenp, char **addrp)
534} 572}
535 573
536/* parse and set the setclientid ipv4 callback address */ 574/* parse and set the setclientid ipv4 callback address */
537int 575static int
538parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp) 576parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
539{ 577{
540 int temp = 0; 578 int temp = 0;
@@ -570,7 +608,7 @@ parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigne
570 return 1; 608 return 1;
571} 609}
572 610
573void 611static void
574gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se) 612gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
575{ 613{
576 struct nfs4_callback *cb = &clp->cl_callback; 614 struct nfs4_callback *cb = &clp->cl_callback;
@@ -584,14 +622,12 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
584 goto out_err; 622 goto out_err;
585 cb->cb_prog = se->se_callback_prog; 623 cb->cb_prog = se->se_callback_prog;
586 cb->cb_ident = se->se_callback_ident; 624 cb->cb_ident = se->se_callback_ident;
587 cb->cb_parsed = 1;
588 return; 625 return;
589out_err: 626out_err:
590 printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " 627 printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
591 "will not receive delegations\n", 628 "will not receive delegations\n",
592 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 629 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
593 630
594 cb->cb_parsed = 0;
595 return; 631 return;
596} 632}
597 633
@@ -638,59 +674,43 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
638 }; 674 };
639 nfs4_verifier clverifier = setclid->se_verf; 675 nfs4_verifier clverifier = setclid->se_verf;
640 unsigned int strhashval; 676 unsigned int strhashval;
641 struct nfs4_client * conf, * unconf, * new, * clp; 677 struct nfs4_client *conf, *unconf, *new;
642 int status; 678 int status;
679 char dname[HEXDIR_LEN];
643 680
644 status = nfserr_inval; 681 status = nfserr_inval;
645 if (!check_name(clname)) 682 if (!check_name(clname))
646 goto out; 683 goto out;
647 684
685 status = nfs4_make_rec_clidname(dname, &clname);
686 if (status)
687 goto out;
688
648 /* 689 /*
649 * XXX The Duplicate Request Cache (DRC) has been checked (??) 690 * XXX The Duplicate Request Cache (DRC) has been checked (??)
650 * We get here on a DRC miss. 691 * We get here on a DRC miss.
651 */ 692 */
652 693
653 strhashval = clientstr_hashval(clname.data, clname.len); 694 strhashval = clientstr_hashval(dname);
654 695
655 conf = NULL;
656 nfs4_lock_state(); 696 nfs4_lock_state();
657 list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) { 697 conf = find_confirmed_client_by_str(dname, strhashval);
658 if (!cmp_name(&clp->cl_name, &clname)) 698 if (conf) {
659 continue;
660 /* 699 /*
661 * CASE 0: 700 * CASE 0:
662 * clname match, confirmed, different principal 701 * clname match, confirmed, different principal
663 * or different ip_address 702 * or different ip_address
664 */ 703 */
665 status = nfserr_clid_inuse; 704 status = nfserr_clid_inuse;
666 if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)) { 705 if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred)
667 printk("NFSD: setclientid: string in use by client" 706 || conf->cl_addr != ip_addr) {
668 "(clientid %08x/%08x)\n",
669 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
670 goto out;
671 }
672 if (clp->cl_addr != ip_addr) {
673 printk("NFSD: setclientid: string in use by client" 707 printk("NFSD: setclientid: string in use by client"
674 "(clientid %08x/%08x)\n", 708 "(clientid %08x/%08x)\n",
675 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 709 conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id);
676 goto out; 710 goto out;
677 } 711 }
678
679 /*
680 * cl_name match from a previous SETCLIENTID operation
681 * XXX check for additional matches?
682 */
683 conf = clp;
684 break;
685 }
686 unconf = NULL;
687 list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) {
688 if (!cmp_name(&clp->cl_name, &clname))
689 continue;
690 /* cl_name match from a previous SETCLIENTID operation */
691 unconf = clp;
692 break;
693 } 712 }
713 unconf = find_unconfirmed_client_by_str(dname, strhashval);
694 status = nfserr_resource; 714 status = nfserr_resource;
695 if (!conf) { 715 if (!conf) {
696 /* 716 /*
@@ -699,7 +719,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
699 */ 719 */
700 if (unconf) 720 if (unconf)
701 expire_client(unconf); 721 expire_client(unconf);
702 if (!(new = create_client(clname))) 722 new = create_client(clname, dname);
723 if (new == NULL)
703 goto out; 724 goto out;
704 copy_verf(new, &clverifier); 725 copy_verf(new, &clverifier);
705 new->cl_addr = ip_addr; 726 new->cl_addr = ip_addr;
@@ -722,12 +743,16 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
722 * nfs4_client, but with the new callback info and a 743 * nfs4_client, but with the new callback info and a
723 * new cl_confirm 744 * new cl_confirm
724 */ 745 */
725 if ((unconf) && 746 if (unconf) {
726 cmp_verf(&unconf->cl_verifier, &conf->cl_verifier) && 747 /* Note this is removing unconfirmed {*x***},
727 cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) { 748 * which is stronger than RFC recommended {vxc**}.
728 expire_client(unconf); 749 * This has the advantage that there is at most
750 * one {*x***} in either list at any time.
751 */
752 expire_client(unconf);
729 } 753 }
730 if (!(new = create_client(clname))) 754 new = create_client(clname, dname);
755 if (new == NULL)
731 goto out; 756 goto out;
732 copy_verf(new,&conf->cl_verifier); 757 copy_verf(new,&conf->cl_verifier);
733 new->cl_addr = ip_addr; 758 new->cl_addr = ip_addr;
@@ -745,7 +770,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
745 * using input clverifier, clname, and callback info 770 * using input clverifier, clname, and callback info
746 * and generate a new cl_clientid and cl_confirm. 771 * and generate a new cl_clientid and cl_confirm.
747 */ 772 */
748 if (!(new = create_client(clname))) 773 new = create_client(clname, dname);
774 if (new == NULL)
749 goto out; 775 goto out;
750 copy_verf(new,&clverifier); 776 copy_verf(new,&clverifier);
751 new->cl_addr = ip_addr; 777 new->cl_addr = ip_addr;
@@ -771,7 +797,8 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
771 * new cl_verifier and a new cl_confirm 797 * new cl_verifier and a new cl_confirm
772 */ 798 */
773 expire_client(unconf); 799 expire_client(unconf);
774 if (!(new = create_client(clname))) 800 new = create_client(clname, dname);
801 if (new == NULL)
775 goto out; 802 goto out;
776 copy_verf(new,&clverifier); 803 copy_verf(new,&clverifier);
777 new->cl_addr = ip_addr; 804 new->cl_addr = ip_addr;
@@ -807,7 +834,7 @@ int
807nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm) 834nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
808{ 835{
809 u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr; 836 u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
810 struct nfs4_client *clp, *conf = NULL, *unconf = NULL; 837 struct nfs4_client *conf, *unconf;
811 nfs4_verifier confirm = setclientid_confirm->sc_confirm; 838 nfs4_verifier confirm = setclientid_confirm->sc_confirm;
812 clientid_t * clid = &setclientid_confirm->sc_clientid; 839 clientid_t * clid = &setclientid_confirm->sc_clientid;
813 int status; 840 int status;
@@ -820,102 +847,90 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confi
820 */ 847 */
821 848
822 nfs4_lock_state(); 849 nfs4_lock_state();
823 clp = find_confirmed_client(clid); 850
824 if (clp) { 851 conf = find_confirmed_client(clid);
825 status = nfserr_inval; 852 unconf = find_unconfirmed_client(clid);
826 /* 853
827 * Found a record for this clientid. If the IP addresses 854 status = nfserr_clid_inuse;
828 * don't match, return ERR_INVAL just as if the record had 855 if (conf && conf->cl_addr != ip_addr)
829 * not been found. 856 goto out;
830 */ 857 if (unconf && unconf->cl_addr != ip_addr)
831 if (clp->cl_addr != ip_addr) { 858 goto out;
832 printk("NFSD: setclientid: string in use by client" 859
833 "(clientid %08x/%08x)\n",
834 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
835 goto out;
836 }
837 conf = clp;
838 }
839 clp = find_unconfirmed_client(clid);
840 if (clp) {
841 status = nfserr_inval;
842 if (clp->cl_addr != ip_addr) {
843 printk("NFSD: setclientid: string in use by client"
844 "(clientid %08x/%08x)\n",
845 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
846 goto out;
847 }
848 unconf = clp;
849 }
850 /* CASE 1:
851 * unconf record that matches input clientid and input confirm.
852 * conf record that matches input clientid.
853 * conf and unconf records match names, verifiers
854 */
855 if ((conf && unconf) && 860 if ((conf && unconf) &&
856 (cmp_verf(&unconf->cl_confirm, &confirm)) && 861 (cmp_verf(&unconf->cl_confirm, &confirm)) &&
857 (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) && 862 (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
858 (cmp_name(&conf->cl_name,&unconf->cl_name)) && 863 (same_name(conf->cl_recdir,unconf->cl_recdir)) &&
859 (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) { 864 (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
865 /* CASE 1:
866 * unconf record that matches input clientid and input confirm.
867 * conf record that matches input clientid.
868 * conf and unconf records match names, verifiers
869 */
860 if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred)) 870 if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred))
861 status = nfserr_clid_inuse; 871 status = nfserr_clid_inuse;
862 else { 872 else {
863 expire_client(conf); 873 /* XXX: We just turn off callbacks until we can handle
864 clp = unconf; 874 * change request correctly. */
865 move_to_confirmed(unconf); 875 atomic_set(&conf->cl_callback.cb_set, 0);
876 gen_confirm(conf);
877 expire_client(unconf);
866 status = nfs_ok; 878 status = nfs_ok;
879
867 } 880 }
868 goto out; 881 } else if ((conf && !unconf) ||
869 }
870 /* CASE 2:
871 * conf record that matches input clientid.
872 * if unconf record that matches input clientid, then unconf->cl_name
873 * or unconf->cl_verifier don't match the conf record.
874 */
875 if ((conf && !unconf) ||
876 ((conf && unconf) && 882 ((conf && unconf) &&
877 (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) || 883 (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
878 !cmp_name(&conf->cl_name, &unconf->cl_name)))) { 884 !same_name(conf->cl_recdir, unconf->cl_recdir)))) {
879 if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) { 885 /* CASE 2:
886 * conf record that matches input clientid.
887 * if unconf record matches input clientid, then
888 * unconf->cl_name or unconf->cl_verifier don't match the
889 * conf record.
890 */
891 if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred))
880 status = nfserr_clid_inuse; 892 status = nfserr_clid_inuse;
881 } else { 893 else
882 clp = conf;
883 status = nfs_ok; 894 status = nfs_ok;
884 } 895 } else if (!conf && unconf
885 goto out; 896 && cmp_verf(&unconf->cl_confirm, &confirm)) {
886 } 897 /* CASE 3:
887 /* CASE 3: 898 * conf record not found.
888 * conf record not found. 899 * unconf record found.
889 * unconf record found. 900 * unconf->cl_confirm matches input confirm
890 * unconf->cl_confirm matches input confirm 901 */
891 */
892 if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
893 if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) { 902 if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
894 status = nfserr_clid_inuse; 903 status = nfserr_clid_inuse;
895 } else { 904 } else {
896 status = nfs_ok; 905 unsigned int hash =
897 clp = unconf; 906 clientstr_hashval(unconf->cl_recdir);
907 conf = find_confirmed_client_by_str(unconf->cl_recdir,
908 hash);
909 if (conf) {
910 nfsd4_remove_clid_dir(conf);
911 expire_client(conf);
912 }
898 move_to_confirmed(unconf); 913 move_to_confirmed(unconf);
914 conf = unconf;
915 status = nfs_ok;
899 } 916 }
900 goto out; 917 } else if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm)))
901 } 918 && (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm,
902 /* CASE 4: 919 &confirm)))) {
903 * conf record not found, or if conf, then conf->cl_confirm does not 920 /* CASE 4:
904 * match input confirm. 921 * conf record not found, or if conf, conf->cl_confirm does not
905 * unconf record not found, or if unconf, then unconf->cl_confirm 922 * match input confirm.
906 * does not match input confirm. 923 * unconf record not found, or if unconf, unconf->cl_confirm
907 */ 924 * does not match input confirm.
908 if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) && 925 */
909 (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) {
910 status = nfserr_stale_clientid; 926 status = nfserr_stale_clientid;
911 goto out; 927 } else {
928 /* check that we have hit one of the cases...*/
929 status = nfserr_clid_inuse;
912 } 930 }
913 /* check that we have hit one of the cases...*/
914 status = nfserr_inval;
915 goto out;
916out: 931out:
917 if (!status) 932 if (!status)
918 nfsd4_probe_callback(clp); 933 nfsd4_probe_callback(conf);
919 nfs4_unlock_state(); 934 nfs4_unlock_state();
920 return status; 935 return status;
921} 936}
@@ -961,60 +976,65 @@ alloc_init_file(struct inode *ino)
961 struct nfs4_file *fp; 976 struct nfs4_file *fp;
962 unsigned int hashval = file_hashval(ino); 977 unsigned int hashval = file_hashval(ino);
963 978
964 if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) { 979 fp = kmem_cache_alloc(file_slab, GFP_KERNEL);
980 if (fp) {
981 kref_init(&fp->fi_ref);
965 INIT_LIST_HEAD(&fp->fi_hash); 982 INIT_LIST_HEAD(&fp->fi_hash);
966 INIT_LIST_HEAD(&fp->fi_perfile); 983 INIT_LIST_HEAD(&fp->fi_stateids);
967 INIT_LIST_HEAD(&fp->fi_del_perfile); 984 INIT_LIST_HEAD(&fp->fi_delegations);
968 list_add(&fp->fi_hash, &file_hashtbl[hashval]); 985 list_add(&fp->fi_hash, &file_hashtbl[hashval]);
969 fp->fi_inode = igrab(ino); 986 fp->fi_inode = igrab(ino);
970 fp->fi_id = current_fileid++; 987 fp->fi_id = current_fileid++;
971 alloc_file++;
972 return fp; 988 return fp;
973 } 989 }
974 return NULL; 990 return NULL;
975} 991}
976 992
977static void 993static void
978release_all_files(void) 994nfsd4_free_slab(kmem_cache_t **slab)
979{ 995{
980 int i; 996 int status;
981 struct nfs4_file *fp;
982 997
983 for (i=0;i<FILE_HASH_SIZE;i++) { 998 if (*slab == NULL)
984 while (!list_empty(&file_hashtbl[i])) { 999 return;
985 fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash); 1000 status = kmem_cache_destroy(*slab);
986 /* this should never be more than once... */ 1001 *slab = NULL;
987 if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) { 1002 WARN_ON(status);
988 printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
989 }
990 release_file(fp);
991 }
992 }
993} 1003}
994 1004
995kmem_cache_t *stateowner_slab = NULL; 1005static void
1006nfsd4_free_slabs(void)
1007{
1008 nfsd4_free_slab(&stateowner_slab);
1009 nfsd4_free_slab(&file_slab);
1010 nfsd4_free_slab(&stateid_slab);
1011 nfsd4_free_slab(&deleg_slab);
1012}
996 1013
997static int 1014static int
998nfsd4_init_slabs(void) 1015nfsd4_init_slabs(void)
999{ 1016{
1000 stateowner_slab = kmem_cache_create("nfsd4_stateowners", 1017 stateowner_slab = kmem_cache_create("nfsd4_stateowners",
1001 sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL); 1018 sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
1002 if (stateowner_slab == NULL) { 1019 if (stateowner_slab == NULL)
1003 dprintk("nfsd4: out of memory while initializing nfsv4\n"); 1020 goto out_nomem;
1004 return -ENOMEM; 1021 file_slab = kmem_cache_create("nfsd4_files",
1005 } 1022 sizeof(struct nfs4_file), 0, 0, NULL, NULL);
1023 if (file_slab == NULL)
1024 goto out_nomem;
1025 stateid_slab = kmem_cache_create("nfsd4_stateids",
1026 sizeof(struct nfs4_stateid), 0, 0, NULL, NULL);
1027 if (stateid_slab == NULL)
1028 goto out_nomem;
1029 deleg_slab = kmem_cache_create("nfsd4_delegations",
1030 sizeof(struct nfs4_delegation), 0, 0, NULL, NULL);
1031 if (deleg_slab == NULL)
1032 goto out_nomem;
1006 return 0; 1033 return 0;
1007} 1034out_nomem:
1008 1035 nfsd4_free_slabs();
1009static void 1036 dprintk("nfsd4: out of memory while initializing nfsv4\n");
1010nfsd4_free_slabs(void) 1037 return -ENOMEM;
1011{
1012 int status = 0;
1013
1014 if (stateowner_slab)
1015 status = kmem_cache_destroy(stateowner_slab);
1016 stateowner_slab = NULL;
1017 BUG_ON(status);
1018} 1038}
1019 1039
1020void 1040void
@@ -1055,14 +1075,13 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
1055 INIT_LIST_HEAD(&sop->so_idhash); 1075 INIT_LIST_HEAD(&sop->so_idhash);
1056 INIT_LIST_HEAD(&sop->so_strhash); 1076 INIT_LIST_HEAD(&sop->so_strhash);
1057 INIT_LIST_HEAD(&sop->so_perclient); 1077 INIT_LIST_HEAD(&sop->so_perclient);
1058 INIT_LIST_HEAD(&sop->so_perfilestate); 1078 INIT_LIST_HEAD(&sop->so_stateids);
1059 INIT_LIST_HEAD(&sop->so_perlockowner); /* not used */ 1079 INIT_LIST_HEAD(&sop->so_perstateid); /* not used */
1060 INIT_LIST_HEAD(&sop->so_close_lru); 1080 INIT_LIST_HEAD(&sop->so_close_lru);
1061 sop->so_time = 0; 1081 sop->so_time = 0;
1062 list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]); 1082 list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
1063 list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]); 1083 list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
1064 list_add(&sop->so_perclient, &clp->cl_perclient); 1084 list_add(&sop->so_perclient, &clp->cl_openowners);
1065 add_perclient++;
1066 sop->so_is_open_owner = 1; 1085 sop->so_is_open_owner = 1;
1067 sop->so_id = current_ownerid++; 1086 sop->so_id = current_ownerid++;
1068 sop->so_client = clp; 1087 sop->so_client = clp;
@@ -1080,10 +1099,10 @@ release_stateid_lockowners(struct nfs4_stateid *open_stp)
1080{ 1099{
1081 struct nfs4_stateowner *lock_sop; 1100 struct nfs4_stateowner *lock_sop;
1082 1101
1083 while (!list_empty(&open_stp->st_perlockowner)) { 1102 while (!list_empty(&open_stp->st_lockowners)) {
1084 lock_sop = list_entry(open_stp->st_perlockowner.next, 1103 lock_sop = list_entry(open_stp->st_lockowners.next,
1085 struct nfs4_stateowner, so_perlockowner); 1104 struct nfs4_stateowner, so_perstateid);
1086 /* list_del(&open_stp->st_perlockowner); */ 1105 /* list_del(&open_stp->st_lockowners); */
1087 BUG_ON(lock_sop->so_is_open_owner); 1106 BUG_ON(lock_sop->so_is_open_owner);
1088 release_stateowner(lock_sop); 1107 release_stateowner(lock_sop);
1089 } 1108 }
@@ -1096,14 +1115,12 @@ unhash_stateowner(struct nfs4_stateowner *sop)
1096 1115
1097 list_del(&sop->so_idhash); 1116 list_del(&sop->so_idhash);
1098 list_del(&sop->so_strhash); 1117 list_del(&sop->so_strhash);
1099 if (sop->so_is_open_owner) { 1118 if (sop->so_is_open_owner)
1100 list_del(&sop->so_perclient); 1119 list_del(&sop->so_perclient);
1101 del_perclient++; 1120 list_del(&sop->so_perstateid);
1102 } 1121 while (!list_empty(&sop->so_stateids)) {
1103 list_del(&sop->so_perlockowner); 1122 stp = list_entry(sop->so_stateids.next,
1104 while (!list_empty(&sop->so_perfilestate)) { 1123 struct nfs4_stateid, st_perstateowner);
1105 stp = list_entry(sop->so_perfilestate.next,
1106 struct nfs4_stateid, st_perfilestate);
1107 if (sop->so_is_open_owner) 1124 if (sop->so_is_open_owner)
1108 release_stateid(stp, OPEN_STATE); 1125 release_stateid(stp, OPEN_STATE);
1109 else 1126 else
@@ -1125,14 +1142,14 @@ init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *
1125 unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); 1142 unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
1126 1143
1127 INIT_LIST_HEAD(&stp->st_hash); 1144 INIT_LIST_HEAD(&stp->st_hash);
1128 INIT_LIST_HEAD(&stp->st_perfilestate); 1145 INIT_LIST_HEAD(&stp->st_perstateowner);
1129 INIT_LIST_HEAD(&stp->st_perlockowner); 1146 INIT_LIST_HEAD(&stp->st_lockowners);
1130 INIT_LIST_HEAD(&stp->st_perfile); 1147 INIT_LIST_HEAD(&stp->st_perfile);
1131 list_add(&stp->st_hash, &stateid_hashtbl[hashval]); 1148 list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
1132 list_add(&stp->st_perfilestate, &sop->so_perfilestate); 1149 list_add(&stp->st_perstateowner, &sop->so_stateids);
1133 list_add_perfile++; 1150 list_add(&stp->st_perfile, &fp->fi_stateids);
1134 list_add(&stp->st_perfile, &fp->fi_perfile);
1135 stp->st_stateowner = sop; 1151 stp->st_stateowner = sop;
1152 get_nfs4_file(fp);
1136 stp->st_file = fp; 1153 stp->st_file = fp;
1137 stp->st_stateid.si_boot = boot_time; 1154 stp->st_stateid.si_boot = boot_time;
1138 stp->st_stateid.si_stateownerid = sop->so_id; 1155 stp->st_stateid.si_stateownerid = sop->so_id;
@@ -1150,30 +1167,20 @@ release_stateid(struct nfs4_stateid *stp, int flags)
1150 struct file *filp = stp->st_vfs_file; 1167 struct file *filp = stp->st_vfs_file;
1151 1168
1152 list_del(&stp->st_hash); 1169 list_del(&stp->st_hash);
1153 list_del_perfile++;
1154 list_del(&stp->st_perfile); 1170 list_del(&stp->st_perfile);
1155 list_del(&stp->st_perfilestate); 1171 list_del(&stp->st_perstateowner);
1156 if (flags & OPEN_STATE) { 1172 if (flags & OPEN_STATE) {
1157 release_stateid_lockowners(stp); 1173 release_stateid_lockowners(stp);
1158 stp->st_vfs_file = NULL; 1174 stp->st_vfs_file = NULL;
1159 nfsd_close(filp); 1175 nfsd_close(filp);
1160 vfsclose++;
1161 } else if (flags & LOCK_STATE) 1176 } else if (flags & LOCK_STATE)
1162 locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner); 1177 locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
1163 kfree(stp); 1178 put_nfs4_file(stp->st_file);
1179 kmem_cache_free(stateid_slab, stp);
1164 stp = NULL; 1180 stp = NULL;
1165} 1181}
1166 1182
1167static void 1183static void
1168release_file(struct nfs4_file *fp)
1169{
1170 free_file++;
1171 list_del(&fp->fi_hash);
1172 iput(fp->fi_inode);
1173 kfree(fp);
1174}
1175
1176void
1177move_to_close_lru(struct nfs4_stateowner *sop) 1184move_to_close_lru(struct nfs4_stateowner *sop)
1178{ 1185{
1179 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); 1186 dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
@@ -1183,11 +1190,10 @@ move_to_close_lru(struct nfs4_stateowner *sop)
1183 sop->so_time = get_seconds(); 1190 sop->so_time = get_seconds();
1184} 1191}
1185 1192
1186void 1193static void
1187release_state_owner(struct nfs4_stateid *stp, int flag) 1194release_state_owner(struct nfs4_stateid *stp, int flag)
1188{ 1195{
1189 struct nfs4_stateowner *sop = stp->st_stateowner; 1196 struct nfs4_stateowner *sop = stp->st_stateowner;
1190 struct nfs4_file *fp = stp->st_file;
1191 1197
1192 dprintk("NFSD: release_state_owner\n"); 1198 dprintk("NFSD: release_state_owner\n");
1193 release_stateid(stp, flag); 1199 release_stateid(stp, flag);
@@ -1196,12 +1202,8 @@ release_state_owner(struct nfs4_stateid *stp, int flag)
1196 * released by the laundromat service after the lease period 1202 * released by the laundromat service after the lease period
1197 * to enable us to handle CLOSE replay 1203 * to enable us to handle CLOSE replay
1198 */ 1204 */
1199 if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) 1205 if (sop->so_confirmed && list_empty(&sop->so_stateids))
1200 move_to_close_lru(sop); 1206 move_to_close_lru(sop);
1201 /* unused nfs4_file's are releseed. XXX slab cache? */
1202 if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
1203 release_file(fp);
1204 }
1205} 1207}
1206 1208
1207static int 1209static int
@@ -1231,8 +1233,10 @@ find_file(struct inode *ino)
1231 struct nfs4_file *fp; 1233 struct nfs4_file *fp;
1232 1234
1233 list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) { 1235 list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
1234 if (fp->fi_inode == ino) 1236 if (fp->fi_inode == ino) {
1237 get_nfs4_file(fp);
1235 return fp; 1238 return fp;
1239 }
1236 } 1240 }
1237 return NULL; 1241 return NULL;
1238} 1242}
@@ -1240,7 +1244,7 @@ find_file(struct inode *ino)
1240#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0) 1244#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0)
1241#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0) 1245#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0)
1242 1246
1243void 1247static void
1244set_access(unsigned int *access, unsigned long bmap) { 1248set_access(unsigned int *access, unsigned long bmap) {
1245 int i; 1249 int i;
1246 1250
@@ -1251,7 +1255,7 @@ set_access(unsigned int *access, unsigned long bmap) {
1251 } 1255 }
1252} 1256}
1253 1257
1254void 1258static void
1255set_deny(unsigned int *deny, unsigned long bmap) { 1259set_deny(unsigned int *deny, unsigned long bmap) {
1256 int i; 1260 int i;
1257 1261
@@ -1277,25 +1281,30 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
1277 * Called to check deny when READ with all zero stateid or 1281 * Called to check deny when READ with all zero stateid or
1278 * WRITE with all zero or all one stateid 1282 * WRITE with all zero or all one stateid
1279 */ 1283 */
1280int 1284static int
1281nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) 1285nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
1282{ 1286{
1283 struct inode *ino = current_fh->fh_dentry->d_inode; 1287 struct inode *ino = current_fh->fh_dentry->d_inode;
1284 struct nfs4_file *fp; 1288 struct nfs4_file *fp;
1285 struct nfs4_stateid *stp; 1289 struct nfs4_stateid *stp;
1290 int ret;
1286 1291
1287 dprintk("NFSD: nfs4_share_conflict\n"); 1292 dprintk("NFSD: nfs4_share_conflict\n");
1288 1293
1289 fp = find_file(ino); 1294 fp = find_file(ino);
1290 if (fp) { 1295 if (!fp)
1296 return nfs_ok;
1297 ret = nfserr_share_denied;
1291 /* Search for conflicting share reservations */ 1298 /* Search for conflicting share reservations */
1292 list_for_each_entry(stp, &fp->fi_perfile, st_perfile) { 1299 list_for_each_entry(stp, &fp->fi_stateids, st_perfile) {
1293 if (test_bit(deny_type, &stp->st_deny_bmap) || 1300 if (test_bit(deny_type, &stp->st_deny_bmap) ||
1294 test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) 1301 test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
1295 return nfserr_share_denied; 1302 goto out;
1296 }
1297 } 1303 }
1298 return nfs_ok; 1304 ret = nfs_ok;
1305out:
1306 put_nfs4_file(fp);
1307 return ret;
1299} 1308}
1300 1309
1301static inline void 1310static inline void
@@ -1427,7 +1436,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
1427 return -EAGAIN; 1436 return -EAGAIN;
1428} 1437}
1429 1438
1430struct lock_manager_operations nfsd_lease_mng_ops = { 1439static struct lock_manager_operations nfsd_lease_mng_ops = {
1431 .fl_break = nfsd_break_deleg_cb, 1440 .fl_break = nfsd_break_deleg_cb,
1432 .fl_release_private = nfsd_release_deleg_cb, 1441 .fl_release_private = nfsd_release_deleg_cb,
1433 .fl_copy_lock = nfsd_copy_lock_deleg_cb, 1442 .fl_copy_lock = nfsd_copy_lock_deleg_cb,
@@ -1526,6 +1535,51 @@ out:
1526 return status; 1535 return status;
1527} 1536}
1528 1537
1538static inline int
1539nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
1540{
1541 if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
1542 return nfserr_openmode;
1543 else
1544 return nfs_ok;
1545}
1546
1547static struct nfs4_delegation *
1548find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
1549{
1550 struct nfs4_delegation *dp;
1551
1552 list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
1553 if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
1554 return dp;
1555 }
1556 return NULL;
1557}
1558
1559static int
1560nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
1561 struct nfs4_delegation **dp)
1562{
1563 int flags;
1564 int status = nfserr_bad_stateid;
1565
1566 *dp = find_delegation_file(fp, &open->op_delegate_stateid);
1567 if (*dp == NULL)
1568 goto out;
1569 flags = open->op_share_access == NFS4_SHARE_ACCESS_READ ?
1570 RD_STATE : WR_STATE;
1571 status = nfs4_check_delegmode(*dp, flags);
1572 if (status)
1573 *dp = NULL;
1574out:
1575 if (open->op_claim_type != NFS4_OPEN_CLAIM_DELEGATE_CUR)
1576 return nfs_ok;
1577 if (status)
1578 return status;
1579 open->op_stateowner->so_confirmed = 1;
1580 return nfs_ok;
1581}
1582
1529static int 1583static int
1530nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp) 1584nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
1531{ 1585{
@@ -1533,7 +1587,7 @@ nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_state
1533 int status = nfserr_share_denied; 1587 int status = nfserr_share_denied;
1534 struct nfs4_stateowner *sop = open->op_stateowner; 1588 struct nfs4_stateowner *sop = open->op_stateowner;
1535 1589
1536 list_for_each_entry(local, &fp->fi_perfile, st_perfile) { 1590 list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
1537 /* ignore lock owners */ 1591 /* ignore lock owners */
1538 if (local->st_stateowner->so_is_open_owner == 0) 1592 if (local->st_stateowner->so_is_open_owner == 0)
1539 continue; 1593 continue;
@@ -1549,25 +1603,37 @@ out:
1549 return status; 1603 return status;
1550} 1604}
1551 1605
1606static inline struct nfs4_stateid *
1607nfs4_alloc_stateid(void)
1608{
1609 return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
1610}
1611
1552static int 1612static int
1553nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, 1613nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
1614 struct nfs4_delegation *dp,
1554 struct svc_fh *cur_fh, int flags) 1615 struct svc_fh *cur_fh, int flags)
1555{ 1616{
1556 struct nfs4_stateid *stp; 1617 struct nfs4_stateid *stp;
1557 int status;
1558 1618
1559 stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL); 1619 stp = nfs4_alloc_stateid();
1560 if (stp == NULL) 1620 if (stp == NULL)
1561 return nfserr_resource; 1621 return nfserr_resource;
1562 1622
1563 status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, &stp->st_vfs_file); 1623 if (dp) {
1564 if (status) { 1624 get_file(dp->dl_vfs_file);
1565 if (status == nfserr_dropit) 1625 stp->st_vfs_file = dp->dl_vfs_file;
1566 status = nfserr_jukebox; 1626 } else {
1567 kfree(stp); 1627 int status;
1568 return status; 1628 status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
1629 &stp->st_vfs_file);
1630 if (status) {
1631 if (status == nfserr_dropit)
1632 status = nfserr_jukebox;
1633 kmem_cache_free(stateid_slab, stp);
1634 return status;
1635 }
1569 } 1636 }
1570 vfsopen++;
1571 *stpp = stp; 1637 *stpp = stp;
1572 return 0; 1638 return 0;
1573} 1639}
@@ -1628,6 +1694,7 @@ nfs4_set_claim_prev(struct nfsd4_open *open, int *status)
1628 *status = nfserr_reclaim_bad; 1694 *status = nfserr_reclaim_bad;
1629 else { 1695 else {
1630 open->op_stateowner->so_confirmed = 1; 1696 open->op_stateowner->so_confirmed = 1;
1697 open->op_stateowner->so_client->cl_firststate = 1;
1631 open->op_stateowner->so_seqid--; 1698 open->op_stateowner->so_seqid--;
1632 } 1699 }
1633 } 1700 }
@@ -1646,14 +1713,30 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
1646 int status, flag = 0; 1713 int status, flag = 0;
1647 1714
1648 flag = NFS4_OPEN_DELEGATE_NONE; 1715 flag = NFS4_OPEN_DELEGATE_NONE;
1649 if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL 1716 open->op_recall = 0;
1650 || !atomic_read(&cb->cb_set) || !sop->so_confirmed) 1717 switch (open->op_claim_type) {
1651 goto out; 1718 case NFS4_OPEN_CLAIM_PREVIOUS:
1652 1719 if (!atomic_read(&cb->cb_set))
1653 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 1720 open->op_recall = 1;
1654 flag = NFS4_OPEN_DELEGATE_WRITE; 1721 flag = open->op_delegate_type;
1655 else 1722 if (flag == NFS4_OPEN_DELEGATE_NONE)
1656 flag = NFS4_OPEN_DELEGATE_READ; 1723 goto out;
1724 break;
1725 case NFS4_OPEN_CLAIM_NULL:
1726 /* Let's not give out any delegations till everyone's
1727 * had the chance to reclaim theirs.... */
1728 if (nfs4_in_grace())
1729 goto out;
1730 if (!atomic_read(&cb->cb_set) || !sop->so_confirmed)
1731 goto out;
1732 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
1733 flag = NFS4_OPEN_DELEGATE_WRITE;
1734 else
1735 flag = NFS4_OPEN_DELEGATE_READ;
1736 break;
1737 default:
1738 goto out;
1739 }
1657 1740
1658 dp = alloc_init_deleg(sop->so_client, stp, fh, flag); 1741 dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
1659 if (dp == NULL) { 1742 if (dp == NULL) {
@@ -1687,6 +1770,10 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
1687 dp->dl_stateid.si_fileid, 1770 dp->dl_stateid.si_fileid,
1688 dp->dl_stateid.si_generation); 1771 dp->dl_stateid.si_generation);
1689out: 1772out:
1773 if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
1774 && flag == NFS4_OPEN_DELEGATE_NONE
1775 && open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
1776 printk("NFSD: WARNING: refusing delegation reclaim\n");
1690 open->op_delegate_type = flag; 1777 open->op_delegate_type = flag;
1691} 1778}
1692 1779
@@ -1699,6 +1786,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1699 struct nfs4_file *fp = NULL; 1786 struct nfs4_file *fp = NULL;
1700 struct inode *ino = current_fh->fh_dentry->d_inode; 1787 struct inode *ino = current_fh->fh_dentry->d_inode;
1701 struct nfs4_stateid *stp = NULL; 1788 struct nfs4_stateid *stp = NULL;
1789 struct nfs4_delegation *dp = NULL;
1702 int status; 1790 int status;
1703 1791
1704 status = nfserr_inval; 1792 status = nfserr_inval;
@@ -1713,7 +1801,13 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1713 if (fp) { 1801 if (fp) {
1714 if ((status = nfs4_check_open(fp, open, &stp))) 1802 if ((status = nfs4_check_open(fp, open, &stp)))
1715 goto out; 1803 goto out;
1804 status = nfs4_check_deleg(fp, open, &dp);
1805 if (status)
1806 goto out;
1716 } else { 1807 } else {
1808 status = nfserr_bad_stateid;
1809 if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR)
1810 goto out;
1717 status = nfserr_resource; 1811 status = nfserr_resource;
1718 fp = alloc_init_file(ino); 1812 fp = alloc_init_file(ino);
1719 if (fp == NULL) 1813 if (fp == NULL)
@@ -1736,7 +1830,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1736 flags = MAY_WRITE; 1830 flags = MAY_WRITE;
1737 else 1831 else
1738 flags = MAY_READ; 1832 flags = MAY_READ;
1739 if ((status = nfs4_new_open(rqstp, &stp, current_fh, flags))) 1833 status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
1834 if (status)
1740 goto out; 1835 goto out;
1741 init_stateid(stp, fp, open); 1836 init_stateid(stp, fp, open);
1742 status = nfsd4_truncate(rqstp, current_fh, open); 1837 status = nfsd4_truncate(rqstp, current_fh, open);
@@ -1759,10 +1854,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1759 stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid, 1854 stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
1760 stp->st_stateid.si_fileid, stp->st_stateid.si_generation); 1855 stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
1761out: 1856out:
1762 /* take the opportunity to clean up unused state */ 1857 if (fp)
1763 if (fp && list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) 1858 put_nfs4_file(fp);
1764 release_file(fp);
1765
1766 /* CLAIM_PREVIOUS has different error returns */ 1859 /* CLAIM_PREVIOUS has different error returns */
1767 nfs4_set_claim_prev(open, &status); 1860 nfs4_set_claim_prev(open, &status);
1768 /* 1861 /*
@@ -1775,6 +1868,7 @@ out:
1775 return status; 1868 return status;
1776} 1869}
1777 1870
1871static struct workqueue_struct *laundry_wq;
1778static struct work_struct laundromat_work; 1872static struct work_struct laundromat_work;
1779static void laundromat_main(void *); 1873static void laundromat_main(void *);
1780static DECLARE_WORK(laundromat_work, laundromat_main, NULL); 1874static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
@@ -1800,7 +1894,7 @@ nfsd4_renew(clientid_t *clid)
1800 } 1894 }
1801 renew_client(clp); 1895 renew_client(clp);
1802 status = nfserr_cb_path_down; 1896 status = nfserr_cb_path_down;
1803 if (!list_empty(&clp->cl_del_perclnt) 1897 if (!list_empty(&clp->cl_delegations)
1804 && !atomic_read(&clp->cl_callback.cb_set)) 1898 && !atomic_read(&clp->cl_callback.cb_set))
1805 goto out; 1899 goto out;
1806 status = nfs_ok; 1900 status = nfs_ok;
@@ -1809,7 +1903,15 @@ out:
1809 return status; 1903 return status;
1810} 1904}
1811 1905
1812time_t 1906static void
1907end_grace(void)
1908{
1909 dprintk("NFSD: end of grace period\n");
1910 nfsd4_recdir_purge_old();
1911 in_grace = 0;
1912}
1913
1914static time_t
1813nfs4_laundromat(void) 1915nfs4_laundromat(void)
1814{ 1916{
1815 struct nfs4_client *clp; 1917 struct nfs4_client *clp;
@@ -1823,6 +1925,8 @@ nfs4_laundromat(void)
1823 nfs4_lock_state(); 1925 nfs4_lock_state();
1824 1926
1825 dprintk("NFSD: laundromat service - starting\n"); 1927 dprintk("NFSD: laundromat service - starting\n");
1928 if (in_grace)
1929 end_grace();
1826 list_for_each_safe(pos, next, &client_lru) { 1930 list_for_each_safe(pos, next, &client_lru) {
1827 clp = list_entry(pos, struct nfs4_client, cl_lru); 1931 clp = list_entry(pos, struct nfs4_client, cl_lru);
1828 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { 1932 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
@@ -1833,6 +1937,7 @@ nfs4_laundromat(void)
1833 } 1937 }
1834 dprintk("NFSD: purging unused client (clientid %08x)\n", 1938 dprintk("NFSD: purging unused client (clientid %08x)\n",
1835 clp->cl_clientid.cl_id); 1939 clp->cl_clientid.cl_id);
1940 nfsd4_remove_clid_dir(clp);
1836 expire_client(clp); 1941 expire_client(clp);
1837 } 1942 }
1838 INIT_LIST_HEAD(&reaplist); 1943 INIT_LIST_HEAD(&reaplist);
@@ -1882,13 +1987,13 @@ laundromat_main(void *not_used)
1882 1987
1883 t = nfs4_laundromat(); 1988 t = nfs4_laundromat();
1884 dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t); 1989 dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
1885 schedule_delayed_work(&laundromat_work, t*HZ); 1990 queue_delayed_work(laundry_wq, &laundromat_work, t*HZ);
1886} 1991}
1887 1992
1888/* search ownerid_hashtbl[] and close_lru for stateid owner 1993/* search ownerid_hashtbl[] and close_lru for stateid owner
1889 * (stateid->si_stateownerid) 1994 * (stateid->si_stateownerid)
1890 */ 1995 */
1891struct nfs4_stateowner * 1996static struct nfs4_stateowner *
1892find_openstateowner_id(u32 st_id, int flags) { 1997find_openstateowner_id(u32 st_id, int flags) {
1893 struct nfs4_stateowner *local = NULL; 1998 struct nfs4_stateowner *local = NULL;
1894 1999
@@ -1949,15 +2054,6 @@ out:
1949} 2054}
1950 2055
1951static inline int 2056static inline int
1952nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
1953{
1954 if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
1955 return nfserr_openmode;
1956 else
1957 return nfs_ok;
1958}
1959
1960static inline int
1961check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) 2057check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
1962{ 2058{
1963 /* Trying to call delegreturn with a special stateid? Yuch: */ 2059 /* Trying to call delegreturn with a special stateid? Yuch: */
@@ -2071,7 +2167,7 @@ out:
2071/* 2167/*
2072 * Checks for sequence id mutating operations. 2168 * Checks for sequence id mutating operations.
2073 */ 2169 */
2074int 2170static int
2075nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid) 2171nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid)
2076{ 2172{
2077 int status; 2173 int status;
@@ -2230,6 +2326,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
2230 stp->st_stateid.si_stateownerid, 2326 stp->st_stateid.si_stateownerid,
2231 stp->st_stateid.si_fileid, 2327 stp->st_stateid.si_fileid,
2232 stp->st_stateid.si_generation); 2328 stp->st_stateid.si_generation);
2329
2330 nfsd4_create_clid_dir(sop->so_client);
2233out: 2331out:
2234 if (oc->oc_stateowner) 2332 if (oc->oc_stateowner)
2235 nfs4_get_stateowner(oc->oc_stateowner); 2333 nfs4_get_stateowner(oc->oc_stateowner);
@@ -2387,7 +2485,7 @@ static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
2387static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; 2485static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
2388static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE]; 2486static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
2389 2487
2390struct nfs4_stateid * 2488static struct nfs4_stateid *
2391find_stateid(stateid_t *stid, int flags) 2489find_stateid(stateid_t *stid, int flags)
2392{ 2490{
2393 struct nfs4_stateid *local = NULL; 2491 struct nfs4_stateid *local = NULL;
@@ -2419,25 +2517,19 @@ find_stateid(stateid_t *stid, int flags)
2419static struct nfs4_delegation * 2517static struct nfs4_delegation *
2420find_delegation_stateid(struct inode *ino, stateid_t *stid) 2518find_delegation_stateid(struct inode *ino, stateid_t *stid)
2421{ 2519{
2422 struct nfs4_delegation *dp = NULL; 2520 struct nfs4_file *fp;
2423 struct nfs4_file *fp = NULL; 2521 struct nfs4_delegation *dl;
2424 u32 st_id;
2425 2522
2426 dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n", 2523 dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
2427 stid->si_boot, stid->si_stateownerid, 2524 stid->si_boot, stid->si_stateownerid,
2428 stid->si_fileid, stid->si_generation); 2525 stid->si_fileid, stid->si_generation);
2429 2526
2430 st_id = stid->si_stateownerid;
2431 fp = find_file(ino); 2527 fp = find_file(ino);
2432 if (fp) { 2528 if (!fp)
2433 list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) { 2529 return NULL;
2434 if(dp->dl_stateid.si_stateownerid == st_id) { 2530 dl = find_delegation_file(fp, stid);
2435 dprintk("NFSD: find_delegation dp %p\n",dp); 2531 put_nfs4_file(fp);
2436 return dp; 2532 return dl;
2437 }
2438 }
2439 }
2440 return NULL;
2441} 2533}
2442 2534
2443/* 2535/*
@@ -2457,7 +2549,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
2457 lock->fl_end = OFFSET_MAX; 2549 lock->fl_end = OFFSET_MAX;
2458} 2550}
2459 2551
2460int 2552static int
2461nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval) 2553nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval)
2462{ 2554{
2463 struct nfs4_stateowner *local = NULL; 2555 struct nfs4_stateowner *local = NULL;
@@ -2498,22 +2590,6 @@ nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
2498} 2590}
2499 2591
2500static struct nfs4_stateowner * 2592static struct nfs4_stateowner *
2501find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid)
2502{
2503 struct nfs4_stateowner *local = NULL;
2504 int i;
2505
2506 for (i = 0; i < LOCK_HASH_SIZE; i++) {
2507 list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) {
2508 if (!cmp_owner_str(local, owner, clid))
2509 continue;
2510 return local;
2511 }
2512 }
2513 return NULL;
2514}
2515
2516static struct nfs4_stateowner *
2517find_lockstateowner_str(struct inode *inode, clientid_t *clid, 2593find_lockstateowner_str(struct inode *inode, clientid_t *clid,
2518 struct xdr_netobj *owner) 2594 struct xdr_netobj *owner)
2519{ 2595{
@@ -2548,13 +2624,13 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
2548 INIT_LIST_HEAD(&sop->so_idhash); 2624 INIT_LIST_HEAD(&sop->so_idhash);
2549 INIT_LIST_HEAD(&sop->so_strhash); 2625 INIT_LIST_HEAD(&sop->so_strhash);
2550 INIT_LIST_HEAD(&sop->so_perclient); 2626 INIT_LIST_HEAD(&sop->so_perclient);
2551 INIT_LIST_HEAD(&sop->so_perfilestate); 2627 INIT_LIST_HEAD(&sop->so_stateids);
2552 INIT_LIST_HEAD(&sop->so_perlockowner); 2628 INIT_LIST_HEAD(&sop->so_perstateid);
2553 INIT_LIST_HEAD(&sop->so_close_lru); /* not used */ 2629 INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
2554 sop->so_time = 0; 2630 sop->so_time = 0;
2555 list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]); 2631 list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
2556 list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]); 2632 list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
2557 list_add(&sop->so_perlockowner, &open_stp->st_perlockowner); 2633 list_add(&sop->so_perstateid, &open_stp->st_lockowners);
2558 sop->so_is_open_owner = 0; 2634 sop->so_is_open_owner = 0;
2559 sop->so_id = current_ownerid++; 2635 sop->so_id = current_ownerid++;
2560 sop->so_client = clp; 2636 sop->so_client = clp;
@@ -2567,24 +2643,24 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, str
2567 return sop; 2643 return sop;
2568} 2644}
2569 2645
2570struct nfs4_stateid * 2646static struct nfs4_stateid *
2571alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp) 2647alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp)
2572{ 2648{
2573 struct nfs4_stateid *stp; 2649 struct nfs4_stateid *stp;
2574 unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id); 2650 unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
2575 2651
2576 if ((stp = kmalloc(sizeof(struct nfs4_stateid), 2652 stp = nfs4_alloc_stateid();
2577 GFP_KERNEL)) == NULL) 2653 if (stp == NULL)
2578 goto out; 2654 goto out;
2579 INIT_LIST_HEAD(&stp->st_hash); 2655 INIT_LIST_HEAD(&stp->st_hash);
2580 INIT_LIST_HEAD(&stp->st_perfile); 2656 INIT_LIST_HEAD(&stp->st_perfile);
2581 INIT_LIST_HEAD(&stp->st_perfilestate); 2657 INIT_LIST_HEAD(&stp->st_perstateowner);
2582 INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */ 2658 INIT_LIST_HEAD(&stp->st_lockowners); /* not used */
2583 list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]); 2659 list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
2584 list_add(&stp->st_perfile, &fp->fi_perfile); 2660 list_add(&stp->st_perfile, &fp->fi_stateids);
2585 list_add_perfile++; 2661 list_add(&stp->st_perstateowner, &sop->so_stateids);
2586 list_add(&stp->st_perfilestate, &sop->so_perfilestate);
2587 stp->st_stateowner = sop; 2662 stp->st_stateowner = sop;
2663 get_nfs4_file(fp);
2588 stp->st_file = fp; 2664 stp->st_file = fp;
2589 stp->st_stateid.si_boot = boot_time; 2665 stp->st_stateid.si_boot = boot_time;
2590 stp->st_stateid.si_stateownerid = sop->so_id; 2666 stp->st_stateid.si_stateownerid = sop->so_id;
@@ -2598,7 +2674,7 @@ out:
2598 return stp; 2674 return stp;
2599} 2675}
2600 2676
2601int 2677static int
2602check_lock_length(u64 offset, u64 length) 2678check_lock_length(u64 offset, u64 length)
2603{ 2679{
2604 return ((length == 0) || ((length != ~(u64)0) && 2680 return ((length == 0) || ((length != ~(u64)0) &&
@@ -2611,7 +2687,7 @@ check_lock_length(u64 offset, u64 length)
2611int 2687int
2612nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock) 2688nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
2613{ 2689{
2614 struct nfs4_stateowner *lock_sop = NULL, *open_sop = NULL; 2690 struct nfs4_stateowner *open_sop = NULL;
2615 struct nfs4_stateid *lock_stp; 2691 struct nfs4_stateid *lock_stp;
2616 struct file *filp; 2692 struct file *filp;
2617 struct file_lock file_lock; 2693 struct file_lock file_lock;
@@ -2670,16 +2746,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
2670 strhashval = lock_ownerstr_hashval(fp->fi_inode, 2746 strhashval = lock_ownerstr_hashval(fp->fi_inode,
2671 open_sop->so_client->cl_clientid.cl_id, 2747 open_sop->so_client->cl_clientid.cl_id,
2672 &lock->v.new.owner); 2748 &lock->v.new.owner);
2673 /* 2749 /* XXX: Do we need to check for duplicate stateowners on
2674 * If we already have this lock owner, the client is in 2750 * the same file, or should they just be allowed (and
2675 * error (or our bookeeping is wrong!) 2751 * create new stateids)? */
2676 * for asking for a 'new lock'.
2677 */
2678 status = nfserr_bad_stateid;
2679 lock_sop = find_lockstateowner(&lock->v.new.owner,
2680 &lock->v.new.clientid);
2681 if (lock_sop)
2682 goto out;
2683 status = nfserr_resource; 2752 status = nfserr_resource;
2684 if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock))) 2753 if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
2685 goto out; 2754 goto out;
@@ -2970,8 +3039,11 @@ int
2970nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner) 3039nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
2971{ 3040{
2972 clientid_t *clid = &rlockowner->rl_clientid; 3041 clientid_t *clid = &rlockowner->rl_clientid;
2973 struct nfs4_stateowner *local = NULL; 3042 struct nfs4_stateowner *sop;
3043 struct nfs4_stateid *stp;
2974 struct xdr_netobj *owner = &rlockowner->rl_owner; 3044 struct xdr_netobj *owner = &rlockowner->rl_owner;
3045 struct list_head matches;
3046 int i;
2975 int status; 3047 int status;
2976 3048
2977 dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", 3049 dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -2987,22 +3059,32 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
2987 3059
2988 nfs4_lock_state(); 3060 nfs4_lock_state();
2989 3061
2990 status = nfs_ok; 3062 status = nfserr_locks_held;
2991 local = find_lockstateowner(owner, clid); 3063 /* XXX: we're doing a linear search through all the lockowners.
2992 if (local) { 3064 * Yipes! For now we'll just hope clients aren't really using
2993 struct nfs4_stateid *stp; 3065 * release_lockowner much, but eventually we have to fix these
2994 3066 * data structures. */
2995 /* check for any locks held by any stateid 3067 INIT_LIST_HEAD(&matches);
2996 * associated with the (lock) stateowner */ 3068 for (i = 0; i < LOCK_HASH_SIZE; i++) {
2997 status = nfserr_locks_held; 3069 list_for_each_entry(sop, &lock_ownerid_hashtbl[i], so_idhash) {
2998 list_for_each_entry(stp, &local->so_perfilestate, 3070 if (!cmp_owner_str(sop, owner, clid))
2999 st_perfilestate) { 3071 continue;
3000 if (check_for_locks(stp->st_vfs_file, local)) 3072 list_for_each_entry(stp, &sop->so_stateids,
3001 goto out; 3073 st_perstateowner) {
3074 if (check_for_locks(stp->st_vfs_file, sop))
3075 goto out;
3076 /* Note: so_perclient unused for lockowners,
3077 * so it's OK to fool with here. */
3078 list_add(&sop->so_perclient, &matches);
3079 }
3002 } 3080 }
3003 /* no locks held by (lock) stateowner */ 3081 }
3004 status = nfs_ok; 3082 /* Clients probably won't expect us to return with some (but not all)
3005 release_stateowner(local); 3083 * of the lockowner state released; so don't release any until all
3084 * have been checked. */
3085 status = nfs_ok;
3086 list_for_each_entry(sop, &matches, so_perclient) {
3087 release_stateowner(sop);
3006 } 3088 }
3007out: 3089out:
3008 nfs4_unlock_state(); 3090 nfs4_unlock_state();
@@ -3010,39 +3092,38 @@ out:
3010} 3092}
3011 3093
3012static inline struct nfs4_client_reclaim * 3094static inline struct nfs4_client_reclaim *
3013alloc_reclaim(int namelen) 3095alloc_reclaim(void)
3014{ 3096{
3015 struct nfs4_client_reclaim *crp = NULL; 3097 return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
3098}
3016 3099
3017 crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL); 3100int
3018 if (!crp) 3101nfs4_has_reclaimed_state(const char *name)
3019 return NULL; 3102{
3020 crp->cr_name.data = kmalloc(namelen, GFP_KERNEL); 3103 unsigned int strhashval = clientstr_hashval(name);
3021 if (!crp->cr_name.data) { 3104 struct nfs4_client *clp;
3022 kfree(crp); 3105
3023 return NULL; 3106 clp = find_confirmed_client_by_str(name, strhashval);
3024 } 3107 return clp ? 1 : 0;
3025 return crp;
3026} 3108}
3027 3109
3028/* 3110/*
3029 * failure => all reset bets are off, nfserr_no_grace... 3111 * failure => all reset bets are off, nfserr_no_grace...
3030 */ 3112 */
3031static int 3113int
3032nfs4_client_to_reclaim(char *name, int namlen) 3114nfs4_client_to_reclaim(const char *name)
3033{ 3115{
3034 unsigned int strhashval; 3116 unsigned int strhashval;
3035 struct nfs4_client_reclaim *crp = NULL; 3117 struct nfs4_client_reclaim *crp = NULL;
3036 3118
3037 dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name); 3119 dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
3038 crp = alloc_reclaim(namlen); 3120 crp = alloc_reclaim();
3039 if (!crp) 3121 if (!crp)
3040 return 0; 3122 return 0;
3041 strhashval = clientstr_hashval(name, namlen); 3123 strhashval = clientstr_hashval(name);
3042 INIT_LIST_HEAD(&crp->cr_strhash); 3124 INIT_LIST_HEAD(&crp->cr_strhash);
3043 list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]); 3125 list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
3044 memcpy(crp->cr_name.data, name, namlen); 3126 memcpy(crp->cr_recdir, name, HEXDIR_LEN);
3045 crp->cr_name.len = namlen;
3046 reclaim_str_hashtbl_size++; 3127 reclaim_str_hashtbl_size++;
3047 return 1; 3128 return 1;
3048} 3129}
@@ -3053,13 +3134,11 @@ nfs4_release_reclaim(void)
3053 struct nfs4_client_reclaim *crp = NULL; 3134 struct nfs4_client_reclaim *crp = NULL;
3054 int i; 3135 int i;
3055 3136
3056 BUG_ON(!nfs4_reclaim_init);
3057 for (i = 0; i < CLIENT_HASH_SIZE; i++) { 3137 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3058 while (!list_empty(&reclaim_str_hashtbl[i])) { 3138 while (!list_empty(&reclaim_str_hashtbl[i])) {
3059 crp = list_entry(reclaim_str_hashtbl[i].next, 3139 crp = list_entry(reclaim_str_hashtbl[i].next,
3060 struct nfs4_client_reclaim, cr_strhash); 3140 struct nfs4_client_reclaim, cr_strhash);
3061 list_del(&crp->cr_strhash); 3141 list_del(&crp->cr_strhash);
3062 kfree(crp->cr_name.data);
3063 kfree(crp); 3142 kfree(crp);
3064 reclaim_str_hashtbl_size--; 3143 reclaim_str_hashtbl_size--;
3065 } 3144 }
@@ -3069,7 +3148,7 @@ nfs4_release_reclaim(void)
3069 3148
3070/* 3149/*
3071 * called from OPEN, CLAIM_PREVIOUS with a new clientid. */ 3150 * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
3072struct nfs4_client_reclaim * 3151static struct nfs4_client_reclaim *
3073nfs4_find_reclaim_client(clientid_t *clid) 3152nfs4_find_reclaim_client(clientid_t *clid)
3074{ 3153{
3075 unsigned int strhashval; 3154 unsigned int strhashval;
@@ -3082,13 +3161,14 @@ nfs4_find_reclaim_client(clientid_t *clid)
3082 if (clp == NULL) 3161 if (clp == NULL)
3083 return NULL; 3162 return NULL;
3084 3163
3085 dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n", 3164 dprintk("NFSD: nfs4_find_reclaim_client for %.*s with recdir %s\n",
3086 clp->cl_name.len, clp->cl_name.data); 3165 clp->cl_name.len, clp->cl_name.data,
3166 clp->cl_recdir);
3087 3167
3088 /* find clp->cl_name in reclaim_str_hashtbl */ 3168 /* find clp->cl_name in reclaim_str_hashtbl */
3089 strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len); 3169 strhashval = clientstr_hashval(clp->cl_recdir);
3090 list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) { 3170 list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
3091 if (cmp_name(&crp->cr_name, &clp->cl_name)) { 3171 if (same_name(crp->cr_recdir, clp->cl_recdir)) {
3092 return crp; 3172 return crp;
3093 } 3173 }
3094 } 3174 }
@@ -3101,30 +3181,16 @@ nfs4_find_reclaim_client(clientid_t *clid)
3101int 3181int
3102nfs4_check_open_reclaim(clientid_t *clid) 3182nfs4_check_open_reclaim(clientid_t *clid)
3103{ 3183{
3104 struct nfs4_client_reclaim *crp; 3184 return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
3105
3106 if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
3107 return nfserr_reclaim_bad;
3108 return nfs_ok;
3109} 3185}
3110 3186
3187/* initialization to perform at module load time: */
3111 3188
3112/* 3189void
3113 * Start and stop routines 3190nfs4_state_init(void)
3114 */
3115
3116static void
3117__nfs4_state_init(void)
3118{ 3191{
3119 int i; 3192 int i;
3120 time_t grace_time;
3121 3193
3122 if (!nfs4_reclaim_init) {
3123 for (i = 0; i < CLIENT_HASH_SIZE; i++)
3124 INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
3125 reclaim_str_hashtbl_size = 0;
3126 nfs4_reclaim_init = 1;
3127 }
3128 for (i = 0; i < CLIENT_HASH_SIZE; i++) { 3194 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3129 INIT_LIST_HEAD(&conf_id_hashtbl[i]); 3195 INIT_LIST_HEAD(&conf_id_hashtbl[i]);
3130 INIT_LIST_HEAD(&conf_str_hashtbl[i]); 3196 INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -3146,26 +3212,46 @@ __nfs4_state_init(void)
3146 INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]); 3212 INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
3147 INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]); 3213 INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
3148 } 3214 }
3149 memset(&zerostateid, 0, sizeof(stateid_t));
3150 memset(&onestateid, ~0, sizeof(stateid_t)); 3215 memset(&onestateid, ~0, sizeof(stateid_t));
3151
3152 INIT_LIST_HEAD(&close_lru); 3216 INIT_LIST_HEAD(&close_lru);
3153 INIT_LIST_HEAD(&client_lru); 3217 INIT_LIST_HEAD(&client_lru);
3154 INIT_LIST_HEAD(&del_recall_lru); 3218 INIT_LIST_HEAD(&del_recall_lru);
3155 spin_lock_init(&recall_lock); 3219 for (i = 0; i < CLIENT_HASH_SIZE; i++)
3220 INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
3221 reclaim_str_hashtbl_size = 0;
3222}
3223
3224static void
3225nfsd4_load_reboot_recovery_data(void)
3226{
3227 int status;
3228
3229 nfs4_lock_state();
3230 nfsd4_init_recdir(user_recovery_dirname);
3231 status = nfsd4_recdir_load();
3232 nfs4_unlock_state();
3233 if (status)
3234 printk("NFSD: Failure reading reboot recovery data\n");
3235}
3236
3237/* initialization to perform when the nfsd service is started: */
3238
3239static void
3240__nfs4_state_start(void)
3241{
3242 time_t grace_time;
3243
3156 boot_time = get_seconds(); 3244 boot_time = get_seconds();
3157 grace_time = max(old_lease_time, lease_time); 3245 grace_time = max(user_lease_time, lease_time);
3158 if (reclaim_str_hashtbl_size == 0) 3246 lease_time = user_lease_time;
3159 grace_time = 0; 3247 in_grace = 1;
3160 if (grace_time) 3248 printk("NFSD: starting %ld-second grace period\n", grace_time);
3161 printk("NFSD: starting %ld-second grace period\n", grace_time); 3249 laundry_wq = create_singlethread_workqueue("nfsd4");
3162 grace_end = boot_time + grace_time; 3250 queue_delayed_work(laundry_wq, &laundromat_work, grace_time*HZ);
3163 INIT_WORK(&laundromat_work,laundromat_main, NULL);
3164 schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ);
3165} 3251}
3166 3252
3167int 3253int
3168nfs4_state_init(void) 3254nfs4_state_start(void)
3169{ 3255{
3170 int status; 3256 int status;
3171 3257
@@ -3174,7 +3260,8 @@ nfs4_state_init(void)
3174 status = nfsd4_init_slabs(); 3260 status = nfsd4_init_slabs();
3175 if (status) 3261 if (status)
3176 return status; 3262 return status;
3177 __nfs4_state_init(); 3263 nfsd4_load_reboot_recovery_data();
3264 __nfs4_state_start();
3178 nfs4_init = 1; 3265 nfs4_init = 1;
3179 return 0; 3266 return 0;
3180} 3267}
@@ -3182,14 +3269,7 @@ nfs4_state_init(void)
3182int 3269int
3183nfs4_in_grace(void) 3270nfs4_in_grace(void)
3184{ 3271{
3185 return get_seconds() < grace_end; 3272 return in_grace;
3186}
3187
3188void
3189set_no_grace(void)
3190{
3191 printk("NFSD: ERROR in reboot recovery. State reclaims will fail.\n");
3192 grace_end = get_seconds();
3193} 3273}
3194 3274
3195time_t 3275time_t
@@ -3236,21 +3316,11 @@ __nfs4_state_shutdown(void)
3236 unhash_delegation(dp); 3316 unhash_delegation(dp);
3237 } 3317 }
3238 3318
3239 release_all_files();
3240 cancel_delayed_work(&laundromat_work); 3319 cancel_delayed_work(&laundromat_work);
3241 flush_scheduled_work(); 3320 flush_workqueue(laundry_wq);
3321 destroy_workqueue(laundry_wq);
3322 nfsd4_shutdown_recdir();
3242 nfs4_init = 0; 3323 nfs4_init = 0;
3243 dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n",
3244 list_add_perfile, list_del_perfile);
3245 dprintk("NFSD: add_perclient %d del_perclient %d\n",
3246 add_perclient, del_perclient);
3247 dprintk("NFSD: alloc_file %d free_file %d\n",
3248 alloc_file, free_file);
3249 dprintk("NFSD: vfsopen %d vfsclose %d\n",
3250 vfsopen, vfsclose);
3251 dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
3252 alloc_delegation, free_delegation);
3253
3254} 3324}
3255 3325
3256void 3326void
@@ -3263,56 +3333,48 @@ nfs4_state_shutdown(void)
3263 nfs4_unlock_state(); 3333 nfs4_unlock_state();
3264} 3334}
3265 3335
3336static void
3337nfs4_set_recdir(char *recdir)
3338{
3339 nfs4_lock_state();
3340 strcpy(user_recovery_dirname, recdir);
3341 nfs4_unlock_state();
3342}
3343
3344/*
3345 * Change the NFSv4 recovery directory to recdir.
3346 */
3347int
3348nfs4_reset_recoverydir(char *recdir)
3349{
3350 int status;
3351 struct nameidata nd;
3352
3353 status = path_lookup(recdir, LOOKUP_FOLLOW, &nd);
3354 if (status)
3355 return status;
3356 status = -ENOTDIR;
3357 if (S_ISDIR(nd.dentry->d_inode->i_mode)) {
3358 nfs4_set_recdir(recdir);
3359 status = 0;
3360 }
3361 path_release(&nd);
3362 return status;
3363}
3364
3266/* 3365/*
3267 * Called when leasetime is changed. 3366 * Called when leasetime is changed.
3268 * 3367 *
3269 * if nfsd is not started, simply set the global lease. 3368 * The only way the protocol gives us to handle on-the-fly lease changes is to
3270 * 3369 * simulate a reboot. Instead of doing that, we just wait till the next time
3271 * if nfsd(s) are running, lease change requires nfsv4 state to be reset. 3370 * we start to register any changes in lease time. If the administrator
3272 * e.g: boot_time is reset, existing nfs4_client structs are 3371 * really wants to change the lease time *now*, they can go ahead and bring
3273 * used to fill reclaim_str_hashtbl, then all state (except for the 3372 * nfsd down and then back up again after changing the lease time.
3274 * reclaim_str_hashtbl) is re-initialized.
3275 *
3276 * if the old lease time is greater than the new lease time, the grace
3277 * period needs to be set to the old lease time to allow clients to reclaim
3278 * their state. XXX - we may want to set the grace period == lease time
3279 * after an initial grace period == old lease time
3280 *
3281 * if an error occurs in this process, the new lease is set, but the server
3282 * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace
3283 * which means OPEN/LOCK/READ/WRITE will fail during grace period.
3284 *
3285 * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and
3286 * OPEN and LOCK reclaims.
3287 */ 3373 */
3288void 3374void
3289nfs4_reset_lease(time_t leasetime) 3375nfs4_reset_lease(time_t leasetime)
3290{ 3376{
3291 struct nfs4_client *clp; 3377 lock_kernel();
3292 int i; 3378 user_lease_time = leasetime;
3293 3379 unlock_kernel();
3294 printk("NFSD: New leasetime %ld\n",leasetime);
3295 if (!nfs4_init)
3296 return;
3297 nfs4_lock_state();
3298 old_lease_time = lease_time;
3299 lease_time = leasetime;
3300
3301 nfs4_release_reclaim();
3302
3303 /* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
3304 for (i = 0; i < CLIENT_HASH_SIZE; i++) {
3305 list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
3306 if (!nfs4_client_to_reclaim(clp->cl_name.data,
3307 clp->cl_name.len)) {
3308 nfs4_release_reclaim();
3309 goto init_state;
3310 }
3311 }
3312 }
3313init_state:
3314 __nfs4_state_shutdown();
3315 __nfs4_state_init();
3316 nfs4_unlock_state();
3317} 3380}
3318
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 36a058a112d5..91fb171d2ace 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -136,7 +136,7 @@ xdr_error: \
136 } \ 136 } \
137} while (0) 137} while (0)
138 138
139u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes) 139static u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
140{ 140{
141 /* We want more bytes than seem to be available. 141 /* We want more bytes than seem to be available.
142 * Maybe we need a new page, maybe we have just run out 142 * Maybe we need a new page, maybe we have just run out
@@ -190,7 +190,7 @@ defer_free(struct nfsd4_compoundargs *argp,
190 return 0; 190 return 0;
191} 191}
192 192
193char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) 193static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
194{ 194{
195 void *new = NULL; 195 void *new = NULL;
196 if (p == argp->tmp) { 196 if (p == argp->tmp) {
@@ -1366,7 +1366,10 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1366 if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) { 1366 if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
1367 if ((buflen -= 4) < 0) 1367 if ((buflen -= 4) < 0)
1368 goto out_resource; 1368 goto out_resource;
1369 WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME ); 1369 if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
1370 WRITE32(NFS4_FH_VOLATILE_ANY);
1371 else
1372 WRITE32(NFS4_FH_VOLATILE_ANY|NFS4_FH_VOL_RENAME);
1370 } 1373 }
1371 if (bmval0 & FATTR4_WORD0_CHANGE) { 1374 if (bmval0 & FATTR4_WORD0_CHANGE) {
1372 /* 1375 /*
@@ -1969,7 +1972,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open
1969 case NFS4_OPEN_DELEGATE_READ: 1972 case NFS4_OPEN_DELEGATE_READ:
1970 RESERVE_SPACE(20 + sizeof(stateid_t)); 1973 RESERVE_SPACE(20 + sizeof(stateid_t));
1971 WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); 1974 WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
1972 WRITE32(0); 1975 WRITE32(open->op_recall);
1973 1976
1974 /* 1977 /*
1975 * TODO: ACE's in delegations 1978 * TODO: ACE's in delegations
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 161afdcb8f7d..841c562991e8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -51,6 +51,7 @@ enum {
51 NFSD_Fh, 51 NFSD_Fh,
52 NFSD_Threads, 52 NFSD_Threads,
53 NFSD_Leasetime, 53 NFSD_Leasetime,
54 NFSD_RecoveryDir,
54}; 55};
55 56
56/* 57/*
@@ -66,6 +67,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size);
66static ssize_t write_filehandle(struct file *file, char *buf, size_t size); 67static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
67static ssize_t write_threads(struct file *file, char *buf, size_t size); 68static ssize_t write_threads(struct file *file, char *buf, size_t size);
68static ssize_t write_leasetime(struct file *file, char *buf, size_t size); 69static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
70static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
69 71
70static ssize_t (*write_op[])(struct file *, char *, size_t) = { 72static ssize_t (*write_op[])(struct file *, char *, size_t) = {
71 [NFSD_Svc] = write_svc, 73 [NFSD_Svc] = write_svc,
@@ -78,6 +80,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
78 [NFSD_Fh] = write_filehandle, 80 [NFSD_Fh] = write_filehandle,
79 [NFSD_Threads] = write_threads, 81 [NFSD_Threads] = write_threads,
80 [NFSD_Leasetime] = write_leasetime, 82 [NFSD_Leasetime] = write_leasetime,
83 [NFSD_RecoveryDir] = write_recoverydir,
81}; 84};
82 85
83static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos) 86static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
@@ -349,6 +352,25 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
349 return strlen(buf); 352 return strlen(buf);
350} 353}
351 354
355static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
356{
357 char *mesg = buf;
358 char *recdir;
359 int len, status;
360
361 if (size > PATH_MAX || buf[size-1] != '\n')
362 return -EINVAL;
363 buf[size-1] = 0;
364
365 recdir = mesg;
366 len = qword_get(&mesg, recdir, size);
367 if (len <= 0)
368 return -EINVAL;
369
370 status = nfs4_reset_recoverydir(recdir);
371 return strlen(buf);
372}
373
352/*----------------------------------------------------------------------------*/ 374/*----------------------------------------------------------------------------*/
353/* 375/*
354 * populating the filesystem. 376 * populating the filesystem.
@@ -369,6 +391,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
369 [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, 391 [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
370#ifdef CONFIG_NFSD_V4 392#ifdef CONFIG_NFSD_V4
371 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, 393 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
394 [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
372#endif 395#endif
373 /* last one */ {""} 396 /* last one */ {""}
374 }; 397 };
@@ -397,9 +420,8 @@ static int __init init_nfsd(void)
397 nfsd_cache_init(); /* RPC reply cache */ 420 nfsd_cache_init(); /* RPC reply cache */
398 nfsd_export_init(); /* Exports table */ 421 nfsd_export_init(); /* Exports table */
399 nfsd_lockd_init(); /* lockd->nfsd callbacks */ 422 nfsd_lockd_init(); /* lockd->nfsd callbacks */
400#ifdef CONFIG_NFSD_V4 423 nfs4_state_init(); /* NFSv4 locking state */
401 nfsd_idmap_init(); /* Name to ID mapping */ 424 nfsd_idmap_init(); /* Name to ID mapping */
402#endif /* CONFIG_NFSD_V4 */
403 if (proc_mkdir("fs/nfs", NULL)) { 425 if (proc_mkdir("fs/nfs", NULL)) {
404 struct proc_dir_entry *entry; 426 struct proc_dir_entry *entry;
405 entry = create_proc_entry("fs/nfs/exports", 0, NULL); 427 entry = create_proc_entry("fs/nfs/exports", 0, NULL);
@@ -426,9 +448,7 @@ static void __exit exit_nfsd(void)
426 remove_proc_entry("fs/nfs", NULL); 448 remove_proc_entry("fs/nfs", NULL);
427 nfsd_stat_shutdown(); 449 nfsd_stat_shutdown();
428 nfsd_lockd_shutdown(); 450 nfsd_lockd_shutdown();
429#ifdef CONFIG_NFSD_V4
430 nfsd_idmap_shutdown(); 451 nfsd_idmap_shutdown();
431#endif /* CONFIG_NFSD_V4 */
432 unregister_filesystem(&nfsd_fs_type); 452 unregister_filesystem(&nfsd_fs_type);
433} 453}
434 454
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 904df604e86b..07b9a065e9da 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -95,7 +95,7 @@ nfsd_svc(unsigned short port, int nrservs)
95 error = nfsd_racache_init(2*nrservs); 95 error = nfsd_racache_init(2*nrservs);
96 if (error<0) 96 if (error<0)
97 goto out; 97 goto out;
98 error = nfs4_state_init(); 98 error = nfs4_state_start();
99 if (error<0) 99 if (error<0)
100 goto out; 100 goto out;
101 if (!nfsd_serv) { 101 if (!nfsd_serv) {
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ae3940dc85cc..de340ffd33c3 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -50,7 +50,6 @@
50#include <linux/posix_acl.h> 50#include <linux/posix_acl.h>
51#ifdef CONFIG_NFSD_V4 51#ifdef CONFIG_NFSD_V4
52#include <linux/posix_acl_xattr.h> 52#include <linux/posix_acl_xattr.h>
53#include <linux/xattr_acl.h>
54#include <linux/xattr.h> 53#include <linux/xattr.h>
55#include <linux/nfs4.h> 54#include <linux/nfs4.h>
56#include <linux/nfs4_acl.h> 55#include <linux/nfs4_acl.h>
@@ -425,13 +424,13 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
425 goto out_nfserr; 424 goto out_nfserr;
426 425
427 if (pacl) { 426 if (pacl) {
428 error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS); 427 error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
429 if (error < 0) 428 if (error < 0)
430 goto out_nfserr; 429 goto out_nfserr;
431 } 430 }
432 431
433 if (dpacl) { 432 if (dpacl) {
434 error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT); 433 error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
435 if (error < 0) 434 if (error < 0)
436 goto out_nfserr; 435 goto out_nfserr;
437 } 436 }
@@ -498,7 +497,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
498 struct posix_acl *pacl = NULL, *dpacl = NULL; 497 struct posix_acl *pacl = NULL, *dpacl = NULL;
499 unsigned int flags = 0; 498 unsigned int flags = 0;
500 499
501 pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS); 500 pacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_ACCESS);
502 if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) 501 if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
503 pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); 502 pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
504 if (IS_ERR(pacl)) { 503 if (IS_ERR(pacl)) {
@@ -508,7 +507,7 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
508 } 507 }
509 508
510 if (S_ISDIR(inode->i_mode)) { 509 if (S_ISDIR(inode->i_mode)) {
511 dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT); 510 dpacl = _get_posix_acl(dentry, POSIX_ACL_XATTR_DEFAULT);
512 if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) 511 if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
513 dpacl = NULL; 512 dpacl = NULL;
514 else if (IS_ERR(dpacl)) { 513 else if (IS_ERR(dpacl)) {
diff --git a/fs/open.c b/fs/open.c
index 963bd81a44c8..3f4a4286fdc4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -21,6 +21,7 @@
21#include <linux/vfs.h> 21#include <linux/vfs.h>
22#include <asm/uaccess.h> 22#include <asm/uaccess.h>
23#include <linux/fs.h> 23#include <linux/fs.h>
24#include <linux/personality.h>
24#include <linux/pagemap.h> 25#include <linux/pagemap.h>
25#include <linux/syscalls.h> 26#include <linux/syscalls.h>
26 27
@@ -807,7 +808,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
807 808
808 /* NB: we're sure to have correct a_ops only after f_op->open */ 809 /* NB: we're sure to have correct a_ops only after f_op->open */
809 if (f->f_flags & O_DIRECT) { 810 if (f->f_flags & O_DIRECT) {
810 if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) { 811 if (!f->f_mapping->a_ops ||
812 ((!f->f_mapping->a_ops->direct_IO) &&
813 (!f->f_mapping->a_ops->get_xip_page))) {
811 fput(f); 814 fput(f);
812 f = ERR_PTR(-EINVAL); 815 f = ERR_PTR(-EINVAL);
813 } 816 }
@@ -933,31 +936,27 @@ EXPORT_SYMBOL(fd_install);
933asmlinkage long sys_open(const char __user * filename, int flags, int mode) 936asmlinkage long sys_open(const char __user * filename, int flags, int mode)
934{ 937{
935 char * tmp; 938 char * tmp;
936 int fd, error; 939 int fd;
940
941 if (force_o_largefile())
942 flags |= O_LARGEFILE;
937 943
938#if BITS_PER_LONG != 32
939 flags |= O_LARGEFILE;
940#endif
941 tmp = getname(filename); 944 tmp = getname(filename);
942 fd = PTR_ERR(tmp); 945 fd = PTR_ERR(tmp);
943 if (!IS_ERR(tmp)) { 946 if (!IS_ERR(tmp)) {
944 fd = get_unused_fd(); 947 fd = get_unused_fd();
945 if (fd >= 0) { 948 if (fd >= 0) {
946 struct file *f = filp_open(tmp, flags, mode); 949 struct file *f = filp_open(tmp, flags, mode);
947 error = PTR_ERR(f); 950 if (IS_ERR(f)) {
948 if (IS_ERR(f)) 951 put_unused_fd(fd);
949 goto out_error; 952 fd = PTR_ERR(f);
950 fd_install(fd, f); 953 } else {
954 fd_install(fd, f);
955 }
951 } 956 }
952out:
953 putname(tmp); 957 putname(tmp);
954 } 958 }
955 return fd; 959 return fd;
956
957out_error:
958 put_unused_fd(fd);
959 fd = error;
960 goto out;
961} 960}
962EXPORT_SYMBOL_GPL(sys_open); 961EXPORT_SYMBOL_GPL(sys_open);
963 962
@@ -980,23 +979,15 @@ asmlinkage long sys_creat(const char __user * pathname, int mode)
980 */ 979 */
981int filp_close(struct file *filp, fl_owner_t id) 980int filp_close(struct file *filp, fl_owner_t id)
982{ 981{
983 int retval; 982 int retval = 0;
984
985 /* Report and clear outstanding errors */
986 retval = filp->f_error;
987 if (retval)
988 filp->f_error = 0;
989 983
990 if (!file_count(filp)) { 984 if (!file_count(filp)) {
991 printk(KERN_ERR "VFS: Close: file count is 0\n"); 985 printk(KERN_ERR "VFS: Close: file count is 0\n");
992 return retval; 986 return 0;
993 } 987 }
994 988
995 if (filp->f_op && filp->f_op->flush) { 989 if (filp->f_op && filp->f_op->flush)
996 int err = filp->f_op->flush(filp); 990 retval = filp->f_op->flush(filp);
997 if (!retval)
998 retval = err;
999 }
1000 991
1001 dnotify_flush(filp, id); 992 dnotify_flush(filp, id);
1002 locks_remove_posix(filp, id); 993 locks_remove_posix(filp, id);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e31903aadd96..ace151fa4878 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -314,7 +314,7 @@ static int may_ptrace_attach(struct task_struct *task)
314 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) 314 (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
315 goto out; 315 goto out;
316 rmb(); 316 rmb();
317 if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) 317 if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE))
318 goto out; 318 goto out;
319 if (security_ptrace(current, task)) 319 if (security_ptrace(current, task))
320 goto out; 320 goto out;
@@ -1113,7 +1113,9 @@ static int task_dumpable(struct task_struct *task)
1113 if (mm) 1113 if (mm)
1114 dumpable = mm->dumpable; 1114 dumpable = mm->dumpable;
1115 task_unlock(task); 1115 task_unlock(task);
1116 return dumpable; 1116 if(dumpable == 1)
1117 return 1;
1118 return 0;
1117} 1119}
1118 1120
1119 1121
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 63a9fbf1ac51..94b570ad037d 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -451,7 +451,7 @@ static int devices_read_proc(char *page, char **start, off_t off,
451 int count, int *eof, void *data) 451 int count, int *eof, void *data)
452{ 452{
453 int len = get_chrdev_list(page); 453 int len = get_chrdev_list(page);
454 len += get_blkdev_list(page+len); 454 len += get_blkdev_list(page+len, len);
455 return proc_calc_metrics(page, start, off, count, eof, len); 455 return proc_calc_metrics(page, start, off, count, eof, len);
456} 456}
457 457
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index cd66147cca04..7a8f5595c26f 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -61,7 +61,7 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
61 ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; 61 ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1;
62 else { 62 else {
63 le = (struct qnx4_link_info*)de; 63 le = (struct qnx4_link_info*)de;
64 ino = ( le->dl_inode_blk - 1 ) * 64 ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) *
65 QNX4_INODES_PER_BLOCK + 65 QNX4_INODES_PER_BLOCK +
66 le->dl_inode_ndx; 66 le->dl_inode_ndx;
67 } 67 }
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index aa92d6b76a9a..b79162a35478 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -236,7 +236,7 @@ unsigned long qnx4_block_map( struct inode *inode, long iblock )
236 struct buffer_head *bh = NULL; 236 struct buffer_head *bh = NULL;
237 struct qnx4_xblk *xblk = NULL; 237 struct qnx4_xblk *xblk = NULL;
238 struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode); 238 struct qnx4_inode_entry *qnx4_inode = qnx4_raw_inode(inode);
239 qnx4_nxtnt_t nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts); 239 u16 nxtnt = le16_to_cpu(qnx4_inode->di_num_xtnts);
240 240
241 if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) { 241 if ( iblock < le32_to_cpu(qnx4_inode->di_first_xtnt.xtnt_size) ) {
242 // iblock is in the first extent. This is easy. 242 // iblock is in the first extent. This is easy.
@@ -372,7 +372,7 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent)
372 printk("qnx4: unable to read the superblock\n"); 372 printk("qnx4: unable to read the superblock\n");
373 goto outnobh; 373 goto outnobh;
374 } 374 }
375 if ( le32_to_cpu( *(__u32*)bh->b_data ) != QNX4_SUPER_MAGIC ) { 375 if ( le32_to_cpup((__le32*) bh->b_data) != QNX4_SUPER_MAGIC ) {
376 if (!silent) 376 if (!silent)
377 printk("qnx4: wrong fsid in superblock.\n"); 377 printk("qnx4: wrong fsid in superblock.\n");
378 goto out; 378 goto out;
diff --git a/fs/quota.c b/fs/quota.c
index 3f0333a51a23..f5d1cff55196 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -149,36 +149,6 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd, qid_t
149 return error; 149 return error;
150} 150}
151 151
152static struct super_block *get_super_to_sync(int type)
153{
154 struct list_head *head;
155 int cnt, dirty;
156
157restart:
158 spin_lock(&sb_lock);
159 list_for_each(head, &super_blocks) {
160 struct super_block *sb = list_entry(head, struct super_block, s_list);
161
162 /* This test just improves performance so it needn't be reliable... */
163 for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
164 if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
165 && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
166 dirty = 1;
167 if (!dirty)
168 continue;
169 sb->s_count++;
170 spin_unlock(&sb_lock);
171 down_read(&sb->s_umount);
172 if (!sb->s_root) {
173 drop_super(sb);
174 goto restart;
175 }
176 return sb;
177 }
178 spin_unlock(&sb_lock);
179 return NULL;
180}
181
182static void quota_sync_sb(struct super_block *sb, int type) 152static void quota_sync_sb(struct super_block *sb, int type)
183{ 153{
184 int cnt; 154 int cnt;
@@ -219,17 +189,35 @@ static void quota_sync_sb(struct super_block *sb, int type)
219 189
220void sync_dquots(struct super_block *sb, int type) 190void sync_dquots(struct super_block *sb, int type)
221{ 191{
192 int cnt, dirty;
193
222 if (sb) { 194 if (sb) {
223 if (sb->s_qcop->quota_sync) 195 if (sb->s_qcop->quota_sync)
224 quota_sync_sb(sb, type); 196 quota_sync_sb(sb, type);
197 return;
225 } 198 }
226 else { 199
227 while ((sb = get_super_to_sync(type)) != NULL) { 200 spin_lock(&sb_lock);
228 if (sb->s_qcop->quota_sync) 201restart:
229 quota_sync_sb(sb, type); 202 list_for_each_entry(sb, &super_blocks, s_list) {
230 drop_super(sb); 203 /* This test just improves performance so it needn't be reliable... */
231 } 204 for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++)
205 if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt)
206 && info_any_dirty(&sb_dqopt(sb)->info[cnt]))
207 dirty = 1;
208 if (!dirty)
209 continue;
210 sb->s_count++;
211 spin_unlock(&sb_lock);
212 down_read(&sb->s_umount);
213 if (sb->s_root && sb->s_qcop->quota_sync)
214 quota_sync_sb(sb, type);
215 up_read(&sb->s_umount);
216 spin_lock(&sb_lock);
217 if (__put_super_and_need_restart(sb))
218 goto restart;
232 } 219 }
220 spin_unlock(&sb_lock);
233} 221}
234 222
235/* Copy parameters and call proper function */ 223/* Copy parameters and call proper function */
diff --git a/fs/read_write.c b/fs/read_write.c
index c4c2bee373ed..9292f5fa4d62 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -203,6 +203,16 @@ Einval:
203 return -EINVAL; 203 return -EINVAL;
204} 204}
205 205
206static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
207{
208 set_current_state(TASK_UNINTERRUPTIBLE);
209 if (!kiocbIsKicked(iocb))
210 schedule();
211 else
212 kiocbClearKicked(iocb);
213 __set_current_state(TASK_RUNNING);
214}
215
206ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 216ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
207{ 217{
208 struct kiocb kiocb; 218 struct kiocb kiocb;
@@ -210,7 +220,10 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
210 220
211 init_sync_kiocb(&kiocb, filp); 221 init_sync_kiocb(&kiocb, filp);
212 kiocb.ki_pos = *ppos; 222 kiocb.ki_pos = *ppos;
213 ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos); 223 while (-EIOCBRETRY ==
224 (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos)))
225 wait_on_retry_sync_kiocb(&kiocb);
226
214 if (-EIOCBQUEUED == ret) 227 if (-EIOCBQUEUED == ret)
215 ret = wait_on_sync_kiocb(&kiocb); 228 ret = wait_on_sync_kiocb(&kiocb);
216 *ppos = kiocb.ki_pos; 229 *ppos = kiocb.ki_pos;
@@ -258,7 +271,10 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
258 271
259 init_sync_kiocb(&kiocb, filp); 272 init_sync_kiocb(&kiocb, filp);
260 kiocb.ki_pos = *ppos; 273 kiocb.ki_pos = *ppos;
261 ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos); 274 while (-EIOCBRETRY ==
275 (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos)))
276 wait_on_retry_sync_kiocb(&kiocb);
277
262 if (-EIOCBQUEUED == ret) 278 if (-EIOCBQUEUED == ret)
263 ret = wait_on_sync_kiocb(&kiocb); 279 ret = wait_on_sync_kiocb(&kiocb);
264 *ppos = kiocb.ki_pos; 280 *ppos = kiocb.ki_pos;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 2230afff1870..12e91209544e 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -201,7 +201,7 @@ static int reiserfs_allocate_blocks_for_region(
201 /* If we came here, it means we absolutely need to open a transaction, 201 /* If we came here, it means we absolutely need to open a transaction,
202 since we need to allocate some blocks */ 202 since we need to allocate some blocks */
203 reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that. 203 reiserfs_write_lock(inode->i_sb); // Journaling stuff and we need that.
204 res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); // Wish I know if this number enough 204 res = journal_begin(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb)); // Wish I know if this number enough
205 if (res) 205 if (res)
206 goto error_exit; 206 goto error_exit;
207 reiserfs_update_inode_transaction(inode) ; 207 reiserfs_update_inode_transaction(inode) ;
@@ -576,7 +576,7 @@ error_exit:
576 int err; 576 int err;
577 // update any changes we made to blk count 577 // update any changes we made to blk count
578 reiserfs_update_sd(th, inode); 578 reiserfs_update_sd(th, inode);
579 err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS); 579 err = journal_end(th, inode->i_sb, JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb));
580 if (err) 580 if (err)
581 res = err; 581 res = err;
582 } 582 }
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 2711dff1b7b4..0d5817f81972 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -28,7 +28,7 @@ static int reiserfs_prepare_write(struct file *f, struct page *page,
28void reiserfs_delete_inode (struct inode * inode) 28void reiserfs_delete_inode (struct inode * inode)
29{ 29{
30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */ 30 /* We need blocks for transaction + (user+group) quota update (possibly delete) */
31 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS; 31 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
32 struct reiserfs_transaction_handle th ; 32 struct reiserfs_transaction_handle th ;
33 33
34 reiserfs_write_lock(inode->i_sb); 34 reiserfs_write_lock(inode->i_sb);
@@ -591,7 +591,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
591 XXX in practically impossible worst case direct2indirect() 591 XXX in practically impossible worst case direct2indirect()
592 can incur (much) more than 3 balancings. 592 can incur (much) more than 3 balancings.
593 quota update for user, group */ 593 quota update for user, group */
594 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS; 594 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 1 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(inode->i_sb);
595 int version; 595 int version;
596 int dangle = 1; 596 int dangle = 1;
597 loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ; 597 loff_t new_offset = (((loff_t)block) << inode->i_sb->s_blocksize_bits) + 1 ;
@@ -2796,12 +2796,15 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
2796 2796
2797 if (!error) { 2797 if (!error) {
2798 struct reiserfs_transaction_handle th; 2798 struct reiserfs_transaction_handle th;
2799 int jbegin_count = 2*(REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb)+REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb))+2;
2799 2800
2800 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ 2801 /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */
2801 journal_begin(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2); 2802 error = journal_begin(&th, inode->i_sb, jbegin_count);
2803 if (error)
2804 goto out;
2802 error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; 2805 error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
2803 if (error) { 2806 if (error) {
2804 journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2); 2807 journal_end(&th, inode->i_sb, jbegin_count);
2805 goto out; 2808 goto out;
2806 } 2809 }
2807 /* Update corresponding info in inode so that everything is in 2810 /* Update corresponding info in inode so that everything is in
@@ -2811,7 +2814,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) {
2811 if (attr->ia_valid & ATTR_GID) 2814 if (attr->ia_valid & ATTR_GID)
2812 inode->i_gid = attr->ia_gid; 2815 inode->i_gid = attr->ia_gid;
2813 mark_inode_dirty(inode); 2816 mark_inode_dirty(inode);
2814 journal_end(&th, inode->i_sb, 4*REISERFS_QUOTA_INIT_BLOCKS+2); 2817 error = journal_end(&th, inode->i_sb, jbegin_count);
2815 } 2818 }
2816 } 2819 }
2817 if (!error) 2820 if (!error)
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 3072cfdee959..7b87707acc36 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2631,6 +2631,8 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct sup
2631 int retval; 2631 int retval;
2632 2632
2633 reiserfs_check_lock_depth(p_s_sb, "journal_begin") ; 2633 reiserfs_check_lock_depth(p_s_sb, "journal_begin") ;
2634 if (nblocks > journal->j_trans_max)
2635 BUG();
2634 2636
2635 PROC_INFO_INC( p_s_sb, journal.journal_being ); 2637 PROC_INFO_INC( p_s_sb, journal.journal_being );
2636 /* set here for journal_join */ 2638 /* set here for journal_join */
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 7d4dc5f5aa8b..4a333255f27a 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -586,7 +586,7 @@ static int reiserfs_create (struct inode * dir, struct dentry *dentry, int mode,
586 int retval; 586 int retval;
587 struct inode * inode; 587 struct inode * inode;
588 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 588 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
589 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); 589 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
590 struct reiserfs_transaction_handle th ; 590 struct reiserfs_transaction_handle th ;
591 int locked; 591 int locked;
592 592
@@ -653,7 +653,7 @@ static int reiserfs_mknod (struct inode * dir, struct dentry *dentry, int mode,
653 struct inode * inode; 653 struct inode * inode;
654 struct reiserfs_transaction_handle th ; 654 struct reiserfs_transaction_handle th ;
655 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 655 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
656 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); 656 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
657 int locked; 657 int locked;
658 658
659 if (!new_valid_dev(rdev)) 659 if (!new_valid_dev(rdev))
@@ -727,7 +727,7 @@ static int reiserfs_mkdir (struct inode * dir, struct dentry *dentry, int mode)
727 struct inode * inode; 727 struct inode * inode;
728 struct reiserfs_transaction_handle th ; 728 struct reiserfs_transaction_handle th ;
729 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 729 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
730 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); 730 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb));
731 int locked; 731 int locked;
732 732
733#ifdef DISPLACE_NEW_PACKING_LOCALITIES 733#ifdef DISPLACE_NEW_PACKING_LOCALITIES
@@ -829,8 +829,10 @@ static int reiserfs_rmdir (struct inode * dir, struct dentry *dentry)
829 829
830 830
831 /* we will be doing 2 balancings and update 2 stat data, we change quotas 831 /* we will be doing 2 balancings and update 2 stat data, we change quotas
832 * of the owner of the directory and of the owner of the parent directory */ 832 * of the owner of the directory and of the owner of the parent directory.
833 jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); 833 * The quota structure is possibly deleted only on last iput => outside
834 * of this transaction */
835 jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
834 836
835 reiserfs_write_lock(dir->i_sb); 837 reiserfs_write_lock(dir->i_sb);
836 retval = journal_begin(&th, dir->i_sb, jbegin_count) ; 838 retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
@@ -913,9 +915,10 @@ static int reiserfs_unlink (struct inode * dir, struct dentry *dentry)
913 inode = dentry->d_inode; 915 inode = dentry->d_inode;
914 916
915 /* in this transaction we can be doing at max two balancings and update 917 /* in this transaction we can be doing at max two balancings and update
916 two stat datas, we change quotas of the owner of the directory and of 918 * two stat datas, we change quotas of the owner of the directory and of
917 the owner of the parent directory */ 919 * the owner of the parent directory. The quota structure is possibly
918 jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); 920 * deleted only on iput => outside of this transaction */
921 jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
919 922
920 reiserfs_write_lock(dir->i_sb); 923 reiserfs_write_lock(dir->i_sb);
921 retval = journal_begin(&th, dir->i_sb, jbegin_count) ; 924 retval = journal_begin(&th, dir->i_sb, jbegin_count) ;
@@ -1000,7 +1003,7 @@ static int reiserfs_symlink (struct inode * parent_dir,
1000 struct reiserfs_transaction_handle th ; 1003 struct reiserfs_transaction_handle th ;
1001 int mode = S_IFLNK | S_IRWXUGO; 1004 int mode = S_IFLNK | S_IRWXUGO;
1002 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ 1005 /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
1003 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS+REISERFS_QUOTA_TRANS_BLOCKS); 1006 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * (REISERFS_QUOTA_INIT_BLOCKS(parent_dir->i_sb)+REISERFS_QUOTA_TRANS_BLOCKS(parent_dir->i_sb));
1004 1007
1005 if (!(inode = new_inode(parent_dir->i_sb))) { 1008 if (!(inode = new_inode(parent_dir->i_sb))) {
1006 return -ENOMEM ; 1009 return -ENOMEM ;
@@ -1076,7 +1079,7 @@ static int reiserfs_link (struct dentry * old_dentry, struct inode * dir, struct
1076 struct inode *inode = old_dentry->d_inode; 1079 struct inode *inode = old_dentry->d_inode;
1077 struct reiserfs_transaction_handle th ; 1080 struct reiserfs_transaction_handle th ;
1078 /* We need blocks for transaction + update of quotas for the owners of the directory */ 1081 /* We need blocks for transaction + update of quotas for the owners of the directory */
1079 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS; 1082 int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 2 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
1080 1083
1081 reiserfs_write_lock(dir->i_sb); 1084 reiserfs_write_lock(dir->i_sb);
1082 if (inode->i_nlink >= REISERFS_LINK_MAX) { 1085 if (inode->i_nlink >= REISERFS_LINK_MAX) {
@@ -1196,7 +1199,7 @@ static int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry,
1196 pointed initially and (5) maybe block containing ".." of 1199 pointed initially and (5) maybe block containing ".." of
1197 renamed directory 1200 renamed directory
1198 quota updates: two parent directories */ 1201 quota updates: two parent directories */
1199 jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS; 1202 jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 5 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(old_dir->i_sb);
1200 1203
1201 old_inode = old_dentry->d_inode; 1204 old_inode = old_dentry->d_inode;
1202 new_dentry_inode = new_dentry->d_inode; 1205 new_dentry_inode = new_dentry->d_inode;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index c47f8fd31a2d..63158491e152 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -223,7 +223,7 @@ extern struct tree_balance * cur_tb;
223const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}}; 223const struct reiserfs_key MIN_KEY = {0, 0, {{0, 0},}};
224 224
225/* Maximal possible key. It is never in the tree. */ 225/* Maximal possible key. It is never in the tree. */
226const struct reiserfs_key MAX_KEY = { 226static const struct reiserfs_key MAX_KEY = {
227 __constant_cpu_to_le32(0xffffffff), 227 __constant_cpu_to_le32(0xffffffff),
228 __constant_cpu_to_le32(0xffffffff), 228 __constant_cpu_to_le32(0xffffffff),
229 {{__constant_cpu_to_le32(0xffffffff), 229 {{__constant_cpu_to_le32(0xffffffff),
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index b35b87744983..660aefca1fd2 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -866,8 +866,9 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
866 {"jdev", .arg_required = 'j', .values = NULL}, 866 {"jdev", .arg_required = 'j', .values = NULL},
867 {"nolargeio", .arg_required = 'w', .values = NULL}, 867 {"nolargeio", .arg_required = 'w', .values = NULL},
868 {"commit", .arg_required = 'c', .values = NULL}, 868 {"commit", .arg_required = 'c', .values = NULL},
869 {"usrquota",}, 869 {"usrquota", .setmask = 1<<REISERFS_QUOTA},
870 {"grpquota",}, 870 {"grpquota", .setmask = 1<<REISERFS_QUOTA},
871 {"noquota", .clrmask = 1<<REISERFS_QUOTA},
871 {"errors", .arg_required = 'e', .values = error_actions}, 872 {"errors", .arg_required = 'e', .values = error_actions},
872 {"usrjquota", .arg_required = 'u'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL}, 873 {"usrjquota", .arg_required = 'u'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL},
873 {"grpjquota", .arg_required = 'g'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL}, 874 {"grpjquota", .arg_required = 'g'|(1<<REISERFS_OPT_ALLOWEMPTY), .values = NULL},
@@ -964,6 +965,7 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
964 return 0; 965 return 0;
965 } 966 }
966 strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); 967 strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg);
968 *mount_options |= 1<<REISERFS_QUOTA;
967 } 969 }
968 else { 970 else {
969 if (REISERFS_SB(s)->s_qf_names[qtype]) { 971 if (REISERFS_SB(s)->s_qf_names[qtype]) {
@@ -995,7 +997,13 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
995 reiserfs_warning(s, "reiserfs_parse_options: journalled quota format not specified."); 997 reiserfs_warning(s, "reiserfs_parse_options: journalled quota format not specified.");
996 return 0; 998 return 0;
997 } 999 }
1000 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
1001 if (!(*mount_options & (1<<REISERFS_QUOTA)) && sb_any_quota_enabled(s)) {
1002 reiserfs_warning(s, "reiserfs_parse_options: quota options must be present when quota is turned on.");
1003 return 0;
1004 }
998#endif 1005#endif
1006
999 return 1; 1007 return 1;
1000} 1008}
1001 1009
@@ -1105,6 +1113,7 @@ static int reiserfs_remount (struct super_block * s, int * mount_flags, char * a
1105 safe_mask |= 1 << REISERFS_ERROR_RO; 1113 safe_mask |= 1 << REISERFS_ERROR_RO;
1106 safe_mask |= 1 << REISERFS_ERROR_CONTINUE; 1114 safe_mask |= 1 << REISERFS_ERROR_CONTINUE;
1107 safe_mask |= 1 << REISERFS_ERROR_PANIC; 1115 safe_mask |= 1 << REISERFS_ERROR_PANIC;
1116 safe_mask |= 1 << REISERFS_QUOTA;
1108 1117
1109 /* Update the bitmask, taking care to keep 1118 /* Update the bitmask, taking care to keep
1110 * the bits we're not allowed to change here */ 1119 * the bits we're not allowed to change here */
@@ -1841,13 +1850,18 @@ static int reiserfs_statfs (struct super_block * s, struct kstatfs * buf)
1841static int reiserfs_dquot_initialize(struct inode *inode, int type) 1850static int reiserfs_dquot_initialize(struct inode *inode, int type)
1842{ 1851{
1843 struct reiserfs_transaction_handle th; 1852 struct reiserfs_transaction_handle th;
1844 int ret; 1853 int ret, err;
1845 1854
1846 /* We may create quota structure so we need to reserve enough blocks */ 1855 /* We may create quota structure so we need to reserve enough blocks */
1847 reiserfs_write_lock(inode->i_sb); 1856 reiserfs_write_lock(inode->i_sb);
1848 journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); 1857 ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
1858 if (ret)
1859 goto out;
1849 ret = dquot_initialize(inode, type); 1860 ret = dquot_initialize(inode, type);
1850 journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); 1861 err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb));
1862 if (!ret && err)
1863 ret = err;
1864out:
1851 reiserfs_write_unlock(inode->i_sb); 1865 reiserfs_write_unlock(inode->i_sb);
1852 return ret; 1866 return ret;
1853} 1867}
@@ -1855,13 +1869,18 @@ static int reiserfs_dquot_initialize(struct inode *inode, int type)
1855static int reiserfs_dquot_drop(struct inode *inode) 1869static int reiserfs_dquot_drop(struct inode *inode)
1856{ 1870{
1857 struct reiserfs_transaction_handle th; 1871 struct reiserfs_transaction_handle th;
1858 int ret; 1872 int ret, err;
1859 1873
1860 /* We may delete quota structure so we need to reserve enough blocks */ 1874 /* We may delete quota structure so we need to reserve enough blocks */
1861 reiserfs_write_lock(inode->i_sb); 1875 reiserfs_write_lock(inode->i_sb);
1862 journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); 1876 ret = journal_begin(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
1877 if (ret)
1878 goto out;
1863 ret = dquot_drop(inode); 1879 ret = dquot_drop(inode);
1864 journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_INIT_BLOCKS); 1880 err = journal_end(&th, inode->i_sb, 2*REISERFS_QUOTA_DEL_BLOCKS(inode->i_sb));
1881 if (!ret && err)
1882 ret = err;
1883out:
1865 reiserfs_write_unlock(inode->i_sb); 1884 reiserfs_write_unlock(inode->i_sb);
1866 return ret; 1885 return ret;
1867} 1886}
@@ -1869,12 +1888,17 @@ static int reiserfs_dquot_drop(struct inode *inode)
1869static int reiserfs_write_dquot(struct dquot *dquot) 1888static int reiserfs_write_dquot(struct dquot *dquot)
1870{ 1889{
1871 struct reiserfs_transaction_handle th; 1890 struct reiserfs_transaction_handle th;
1872 int ret; 1891 int ret, err;
1873 1892
1874 reiserfs_write_lock(dquot->dq_sb); 1893 reiserfs_write_lock(dquot->dq_sb);
1875 journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS); 1894 ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1895 if (ret)
1896 goto out;
1876 ret = dquot_commit(dquot); 1897 ret = dquot_commit(dquot);
1877 journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS); 1898 err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1899 if (!ret && err)
1900 ret = err;
1901out:
1878 reiserfs_write_unlock(dquot->dq_sb); 1902 reiserfs_write_unlock(dquot->dq_sb);
1879 return ret; 1903 return ret;
1880} 1904}
@@ -1882,12 +1906,17 @@ static int reiserfs_write_dquot(struct dquot *dquot)
1882static int reiserfs_acquire_dquot(struct dquot *dquot) 1906static int reiserfs_acquire_dquot(struct dquot *dquot)
1883{ 1907{
1884 struct reiserfs_transaction_handle th; 1908 struct reiserfs_transaction_handle th;
1885 int ret; 1909 int ret, err;
1886 1910
1887 reiserfs_write_lock(dquot->dq_sb); 1911 reiserfs_write_lock(dquot->dq_sb);
1888 journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); 1912 ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1913 if (ret)
1914 goto out;
1889 ret = dquot_acquire(dquot); 1915 ret = dquot_acquire(dquot);
1890 journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); 1916 err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1917 if (!ret && err)
1918 ret = err;
1919out:
1891 reiserfs_write_unlock(dquot->dq_sb); 1920 reiserfs_write_unlock(dquot->dq_sb);
1892 return ret; 1921 return ret;
1893} 1922}
@@ -1895,12 +1924,17 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
1895static int reiserfs_release_dquot(struct dquot *dquot) 1924static int reiserfs_release_dquot(struct dquot *dquot)
1896{ 1925{
1897 struct reiserfs_transaction_handle th; 1926 struct reiserfs_transaction_handle th;
1898 int ret; 1927 int ret, err;
1899 1928
1900 reiserfs_write_lock(dquot->dq_sb); 1929 reiserfs_write_lock(dquot->dq_sb);
1901 journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); 1930 ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
1931 if (ret)
1932 goto out;
1902 ret = dquot_release(dquot); 1933 ret = dquot_release(dquot);
1903 journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS); 1934 err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
1935 if (!ret && err)
1936 ret = err;
1937out:
1904 reiserfs_write_unlock(dquot->dq_sb); 1938 reiserfs_write_unlock(dquot->dq_sb);
1905 return ret; 1939 return ret;
1906} 1940}
@@ -1920,39 +1954,29 @@ static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
1920static int reiserfs_write_info(struct super_block *sb, int type) 1954static int reiserfs_write_info(struct super_block *sb, int type)
1921{ 1955{
1922 struct reiserfs_transaction_handle th; 1956 struct reiserfs_transaction_handle th;
1923 int ret; 1957 int ret, err;
1924 1958
1925 /* Data block + inode block */ 1959 /* Data block + inode block */
1926 reiserfs_write_lock(sb); 1960 reiserfs_write_lock(sb);
1927 journal_begin(&th, sb, 2); 1961 ret = journal_begin(&th, sb, 2);
1962 if (ret)
1963 goto out;
1928 ret = dquot_commit_info(sb, type); 1964 ret = dquot_commit_info(sb, type);
1929 journal_end(&th, sb, 2); 1965 err = journal_end(&th, sb, 2);
1966 if (!ret && err)
1967 ret = err;
1968out:
1930 reiserfs_write_unlock(sb); 1969 reiserfs_write_unlock(sb);
1931 return ret; 1970 return ret;
1932} 1971}
1933 1972
1934/* 1973/*
1935 * Turn on quotas during mount time - we need to find 1974 * Turn on quotas during mount time - we need to find the quota file and such...
1936 * the quota file and such...
1937 */ 1975 */
1938static int reiserfs_quota_on_mount(struct super_block *sb, int type) 1976static int reiserfs_quota_on_mount(struct super_block *sb, int type)
1939{ 1977{
1940 int err; 1978 return vfs_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type],
1941 struct dentry *dentry; 1979 REISERFS_SB(sb)->s_jquota_fmt, type);
1942 struct qstr name = { .name = REISERFS_SB(sb)->s_qf_names[type],
1943 .hash = 0,
1944 .len = strlen(REISERFS_SB(sb)->s_qf_names[type])};
1945
1946 dentry = lookup_hash(&name, sb->s_root);
1947 if (IS_ERR(dentry))
1948 return PTR_ERR(dentry);
1949 err = vfs_quota_on_mount(type, REISERFS_SB(sb)->s_jquota_fmt, dentry);
1950 /* Now invalidate and put the dentry - quota got its own reference
1951 * to inode and dentry has at least wrong hash so we had better
1952 * throw it away */
1953 d_invalidate(dentry);
1954 dput(dentry);
1955 return err;
1956} 1980}
1957 1981
1958/* 1982/*
@@ -1963,6 +1987,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, ch
1963 int err; 1987 int err;
1964 struct nameidata nd; 1988 struct nameidata nd;
1965 1989
1990 if (!(REISERFS_SB(sb)->s_mount_opt & (1<<REISERFS_QUOTA)))
1991 return -EINVAL;
1966 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 1992 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
1967 if (err) 1993 if (err)
1968 return err; 1994 return err;
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index e302071903a1..c312881c5f53 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -4,7 +4,7 @@
4#include <linux/errno.h> 4#include <linux/errno.h>
5#include <linux/pagemap.h> 5#include <linux/pagemap.h>
6#include <linux/xattr.h> 6#include <linux/xattr.h>
7#include <linux/xattr_acl.h> 7#include <linux/posix_acl_xattr.h>
8#include <linux/reiserfs_xattr.h> 8#include <linux/reiserfs_xattr.h>
9#include <linux/reiserfs_acl.h> 9#include <linux/reiserfs_acl.h>
10#include <asm/uaccess.h> 10#include <asm/uaccess.h>
@@ -192,11 +192,11 @@ reiserfs_get_acl(struct inode *inode, int type)
192 192
193 switch (type) { 193 switch (type) {
194 case ACL_TYPE_ACCESS: 194 case ACL_TYPE_ACCESS:
195 name = XATTR_NAME_ACL_ACCESS; 195 name = POSIX_ACL_XATTR_ACCESS;
196 p_acl = &reiserfs_i->i_acl_access; 196 p_acl = &reiserfs_i->i_acl_access;
197 break; 197 break;
198 case ACL_TYPE_DEFAULT: 198 case ACL_TYPE_DEFAULT:
199 name = XATTR_NAME_ACL_DEFAULT; 199 name = POSIX_ACL_XATTR_DEFAULT;
200 p_acl = &reiserfs_i->i_acl_default; 200 p_acl = &reiserfs_i->i_acl_default;
201 break; 201 break;
202 default: 202 default:
@@ -260,7 +260,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
260 260
261 switch (type) { 261 switch (type) {
262 case ACL_TYPE_ACCESS: 262 case ACL_TYPE_ACCESS:
263 name = XATTR_NAME_ACL_ACCESS; 263 name = POSIX_ACL_XATTR_ACCESS;
264 p_acl = &reiserfs_i->i_acl_access; 264 p_acl = &reiserfs_i->i_acl_access;
265 if (acl) { 265 if (acl) {
266 mode_t mode = inode->i_mode; 266 mode_t mode = inode->i_mode;
@@ -275,7 +275,7 @@ reiserfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
275 } 275 }
276 break; 276 break;
277 case ACL_TYPE_DEFAULT: 277 case ACL_TYPE_DEFAULT:
278 name = XATTR_NAME_ACL_DEFAULT; 278 name = POSIX_ACL_XATTR_DEFAULT;
279 p_acl = &reiserfs_i->i_acl_default; 279 p_acl = &reiserfs_i->i_acl_default;
280 if (!S_ISDIR (inode->i_mode)) 280 if (!S_ISDIR (inode->i_mode))
281 return acl ? -EACCES : 0; 281 return acl ? -EACCES : 0;
@@ -468,7 +468,7 @@ static int
468posix_acl_access_get(struct inode *inode, const char *name, 468posix_acl_access_get(struct inode *inode, const char *name,
469 void *buffer, size_t size) 469 void *buffer, size_t size)
470{ 470{
471 if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) 471 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
472 return -EINVAL; 472 return -EINVAL;
473 return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size); 473 return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
474} 474}
@@ -477,7 +477,7 @@ static int
477posix_acl_access_set(struct inode *inode, const char *name, 477posix_acl_access_set(struct inode *inode, const char *name,
478 const void *value, size_t size, int flags) 478 const void *value, size_t size, int flags)
479{ 479{
480 if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) 480 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
481 return -EINVAL; 481 return -EINVAL;
482 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size); 482 return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
483} 483}
@@ -487,7 +487,7 @@ posix_acl_access_del (struct inode *inode, const char *name)
487{ 487{
488 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 488 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
489 struct posix_acl **acl = &reiserfs_i->i_acl_access; 489 struct posix_acl **acl = &reiserfs_i->i_acl_access;
490 if (strlen(name) != sizeof(XATTR_NAME_ACL_ACCESS)-1) 490 if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS)-1)
491 return -EINVAL; 491 return -EINVAL;
492 if (!IS_ERR (*acl) && *acl) { 492 if (!IS_ERR (*acl) && *acl) {
493 posix_acl_release (*acl); 493 posix_acl_release (*acl);
@@ -510,7 +510,7 @@ posix_acl_access_list (struct inode *inode, const char *name, int namelen, char
510} 510}
511 511
512struct reiserfs_xattr_handler posix_acl_access_handler = { 512struct reiserfs_xattr_handler posix_acl_access_handler = {
513 .prefix = XATTR_NAME_ACL_ACCESS, 513 .prefix = POSIX_ACL_XATTR_ACCESS,
514 .get = posix_acl_access_get, 514 .get = posix_acl_access_get,
515 .set = posix_acl_access_set, 515 .set = posix_acl_access_set,
516 .del = posix_acl_access_del, 516 .del = posix_acl_access_del,
@@ -521,7 +521,7 @@ static int
521posix_acl_default_get (struct inode *inode, const char *name, 521posix_acl_default_get (struct inode *inode, const char *name,
522 void *buffer, size_t size) 522 void *buffer, size_t size)
523{ 523{
524 if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) 524 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
525 return -EINVAL; 525 return -EINVAL;
526 return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size); 526 return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
527} 527}
@@ -530,7 +530,7 @@ static int
530posix_acl_default_set(struct inode *inode, const char *name, 530posix_acl_default_set(struct inode *inode, const char *name,
531 const void *value, size_t size, int flags) 531 const void *value, size_t size, int flags)
532{ 532{
533 if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) 533 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
534 return -EINVAL; 534 return -EINVAL;
535 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size); 535 return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
536} 536}
@@ -540,7 +540,7 @@ posix_acl_default_del (struct inode *inode, const char *name)
540{ 540{
541 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode); 541 struct reiserfs_inode_info *reiserfs_i = REISERFS_I(inode);
542 struct posix_acl **acl = &reiserfs_i->i_acl_default; 542 struct posix_acl **acl = &reiserfs_i->i_acl_default;
543 if (strlen(name) != sizeof(XATTR_NAME_ACL_DEFAULT)-1) 543 if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT)-1)
544 return -EINVAL; 544 return -EINVAL;
545 if (!IS_ERR (*acl) && *acl) { 545 if (!IS_ERR (*acl) && *acl) {
546 posix_acl_release (*acl); 546 posix_acl_release (*acl);
@@ -563,7 +563,7 @@ posix_acl_default_list (struct inode *inode, const char *name, int namelen, char
563} 563}
564 564
565struct reiserfs_xattr_handler posix_acl_default_handler = { 565struct reiserfs_xattr_handler posix_acl_default_handler = {
566 .prefix = XATTR_NAME_ACL_DEFAULT, 566 .prefix = POSIX_ACL_XATTR_DEFAULT,
567 .get = posix_acl_default_get, 567 .get = posix_acl_default_get,
568 .set = posix_acl_default_set, 568 .set = posix_acl_default_set,
569 .del = posix_acl_default_del, 569 .del = posix_acl_default_del,
diff --git a/fs/super.c b/fs/super.c
index 573bcc81bb82..25bc1ec6bc5d 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -341,20 +341,22 @@ static inline void write_super(struct super_block *sb)
341 */ 341 */
342void sync_supers(void) 342void sync_supers(void)
343{ 343{
344 struct super_block * sb; 344 struct super_block *sb;
345restart: 345
346 spin_lock(&sb_lock); 346 spin_lock(&sb_lock);
347 sb = sb_entry(super_blocks.next); 347restart:
348 while (sb != sb_entry(&super_blocks)) 348 list_for_each_entry(sb, &super_blocks, s_list) {
349 if (sb->s_dirt) { 349 if (sb->s_dirt) {
350 sb->s_count++; 350 sb->s_count++;
351 spin_unlock(&sb_lock); 351 spin_unlock(&sb_lock);
352 down_read(&sb->s_umount); 352 down_read(&sb->s_umount);
353 write_super(sb); 353 write_super(sb);
354 drop_super(sb); 354 up_read(&sb->s_umount);
355 goto restart; 355 spin_lock(&sb_lock);
356 } else 356 if (__put_super_and_need_restart(sb))
357 sb = sb_entry(sb->s_list.next); 357 goto restart;
358 }
359 }
358 spin_unlock(&sb_lock); 360 spin_unlock(&sb_lock);
359} 361}
360 362
@@ -381,20 +383,16 @@ void sync_filesystems(int wait)
381 383
382 down(&mutex); /* Could be down_interruptible */ 384 down(&mutex); /* Could be down_interruptible */
383 spin_lock(&sb_lock); 385 spin_lock(&sb_lock);
384 for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks); 386 list_for_each_entry(sb, &super_blocks, s_list) {
385 sb = sb_entry(sb->s_list.next)) {
386 if (!sb->s_op->sync_fs) 387 if (!sb->s_op->sync_fs)
387 continue; 388 continue;
388 if (sb->s_flags & MS_RDONLY) 389 if (sb->s_flags & MS_RDONLY)
389 continue; 390 continue;
390 sb->s_need_sync_fs = 1; 391 sb->s_need_sync_fs = 1;
391 } 392 }
392 spin_unlock(&sb_lock);
393 393
394restart: 394restart:
395 spin_lock(&sb_lock); 395 list_for_each_entry(sb, &super_blocks, s_list) {
396 for (sb = sb_entry(super_blocks.next); sb != sb_entry(&super_blocks);
397 sb = sb_entry(sb->s_list.next)) {
398 if (!sb->s_need_sync_fs) 396 if (!sb->s_need_sync_fs)
399 continue; 397 continue;
400 sb->s_need_sync_fs = 0; 398 sb->s_need_sync_fs = 0;
@@ -405,8 +403,11 @@ restart:
405 down_read(&sb->s_umount); 403 down_read(&sb->s_umount);
406 if (sb->s_root && (wait || sb->s_dirt)) 404 if (sb->s_root && (wait || sb->s_dirt))
407 sb->s_op->sync_fs(sb, wait); 405 sb->s_op->sync_fs(sb, wait);
408 drop_super(sb); 406 up_read(&sb->s_umount);
409 goto restart; 407 /* restart only when sb is no longer on the list */
408 spin_lock(&sb_lock);
409 if (__put_super_and_need_restart(sb))
410 goto restart;
410 } 411 }
411 spin_unlock(&sb_lock); 412 spin_unlock(&sb_lock);
412 up(&mutex); 413 up(&mutex);
@@ -422,21 +423,25 @@ restart:
422 423
423struct super_block * get_super(struct block_device *bdev) 424struct super_block * get_super(struct block_device *bdev)
424{ 425{
425 struct list_head *p; 426 struct super_block *sb;
427
426 if (!bdev) 428 if (!bdev)
427 return NULL; 429 return NULL;
428rescan: 430
429 spin_lock(&sb_lock); 431 spin_lock(&sb_lock);
430 list_for_each(p, &super_blocks) { 432rescan:
431 struct super_block *s = sb_entry(p); 433 list_for_each_entry(sb, &super_blocks, s_list) {
432 if (s->s_bdev == bdev) { 434 if (sb->s_bdev == bdev) {
433 s->s_count++; 435 sb->s_count++;
434 spin_unlock(&sb_lock); 436 spin_unlock(&sb_lock);
435 down_read(&s->s_umount); 437 down_read(&sb->s_umount);
436 if (s->s_root) 438 if (sb->s_root)
437 return s; 439 return sb;
438 drop_super(s); 440 up_read(&sb->s_umount);
439 goto rescan; 441 /* restart only when sb is no longer on the list */
442 spin_lock(&sb_lock);
443 if (__put_super_and_need_restart(sb))
444 goto rescan;
440 } 445 }
441 } 446 }
442 spin_unlock(&sb_lock); 447 spin_unlock(&sb_lock);
@@ -447,20 +452,22 @@ EXPORT_SYMBOL(get_super);
447 452
448struct super_block * user_get_super(dev_t dev) 453struct super_block * user_get_super(dev_t dev)
449{ 454{
450 struct list_head *p; 455 struct super_block *sb;
451 456
452rescan:
453 spin_lock(&sb_lock); 457 spin_lock(&sb_lock);
454 list_for_each(p, &super_blocks) { 458rescan:
455 struct super_block *s = sb_entry(p); 459 list_for_each_entry(sb, &super_blocks, s_list) {
456 if (s->s_dev == dev) { 460 if (sb->s_dev == dev) {
457 s->s_count++; 461 sb->s_count++;
458 spin_unlock(&sb_lock); 462 spin_unlock(&sb_lock);
459 down_read(&s->s_umount); 463 down_read(&sb->s_umount);
460 if (s->s_root) 464 if (sb->s_root)
461 return s; 465 return sb;
462 drop_super(s); 466 up_read(&sb->s_umount);
463 goto rescan; 467 /* restart only when sb is no longer on the list */
468 spin_lock(&sb_lock);
469 if (__put_super_and_need_restart(sb))
470 goto rescan;
464 } 471 }
465 } 472 }
466 spin_unlock(&sb_lock); 473 spin_unlock(&sb_lock);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 37d7a6875d86..59734ba1ee60 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -8,6 +8,7 @@
8#include <linux/mount.h> 8#include <linux/mount.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/kobject.h> 10#include <linux/kobject.h>
11#include <linux/namei.h>
11#include "sysfs.h" 12#include "sysfs.h"
12 13
13DECLARE_RWSEM(sysfs_rename_sem); 14DECLARE_RWSEM(sysfs_rename_sem);
@@ -99,7 +100,7 @@ static int create_dir(struct kobject * k, struct dentry * p,
99 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; 100 umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
100 101
101 down(&p->d_inode->i_sem); 102 down(&p->d_inode->i_sem);
102 *d = sysfs_get_dentry(p,n); 103 *d = lookup_one_len(n, p, strlen(n));
103 if (!IS_ERR(*d)) { 104 if (!IS_ERR(*d)) {
104 error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR); 105 error = sysfs_make_dirent(p->d_fsdata, *d, k, mode, SYSFS_DIR);
105 if (!error) { 106 if (!error) {
@@ -315,7 +316,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
315 316
316 down(&parent->d_inode->i_sem); 317 down(&parent->d_inode->i_sem);
317 318
318 new_dentry = sysfs_get_dentry(parent, new_name); 319 new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
319 if (!IS_ERR(new_dentry)) { 320 if (!IS_ERR(new_dentry)) {
320 if (!new_dentry->d_inode) { 321 if (!new_dentry->d_inode) {
321 error = kobject_set_name(kobj, "%s", new_name); 322 error = kobject_set_name(kobj, "%s", new_name);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 849aac115460..d72c1ce48559 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -5,6 +5,7 @@
5#include <linux/module.h> 5#include <linux/module.h>
6#include <linux/dnotify.h> 6#include <linux/dnotify.h>
7#include <linux/kobject.h> 7#include <linux/kobject.h>
8#include <linux/namei.h>
8#include <asm/uaccess.h> 9#include <asm/uaccess.h>
9#include <asm/semaphore.h> 10#include <asm/semaphore.h>
10 11
@@ -13,7 +14,7 @@
13#define to_subsys(k) container_of(k,struct subsystem,kset.kobj) 14#define to_subsys(k) container_of(k,struct subsystem,kset.kobj)
14#define to_sattr(a) container_of(a,struct subsys_attribute,attr) 15#define to_sattr(a) container_of(a,struct subsys_attribute,attr)
15 16
16/** 17/*
17 * Subsystem file operations. 18 * Subsystem file operations.
18 * These operations allow subsystems to have files that can be 19 * These operations allow subsystems to have files that can be
19 * read/written. 20 * read/written.
@@ -191,8 +192,9 @@ fill_write_buffer(struct sysfs_buffer * buffer, const char __user * buf, size_t
191 192
192/** 193/**
193 * flush_write_buffer - push buffer to kobject. 194 * flush_write_buffer - push buffer to kobject.
194 * @file: file pointer. 195 * @dentry: dentry to the attribute
195 * @buffer: data buffer for file. 196 * @buffer: data buffer for file.
197 * @count: number of bytes
196 * 198 *
197 * Get the correct pointers for the kobject and the attribute we're 199 * Get the correct pointers for the kobject and the attribute we're
198 * dealing with, then call the store() method for the attribute, 200 * dealing with, then call the store() method for the attribute,
@@ -400,7 +402,7 @@ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
400 int res = -ENOENT; 402 int res = -ENOENT;
401 403
402 down(&dir->d_inode->i_sem); 404 down(&dir->d_inode->i_sem);
403 victim = sysfs_get_dentry(dir, attr->name); 405 victim = lookup_one_len(attr->name, dir, strlen(attr->name));
404 if (!IS_ERR(victim)) { 406 if (!IS_ERR(victim)) {
405 /* make sure dentry is really there */ 407 /* make sure dentry is really there */
406 if (victim->d_inode && 408 if (victim->d_inode &&
@@ -443,7 +445,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
443 int res = -ENOENT; 445 int res = -ENOENT;
444 446
445 down(&dir->d_inode->i_sem); 447 down(&dir->d_inode->i_sem);
446 victim = sysfs_get_dentry(dir, attr->name); 448 victim = lookup_one_len(attr->name, dir, strlen(attr->name));
447 if (!IS_ERR(victim)) { 449 if (!IS_ERR(victim)) {
448 if (victim->d_inode && 450 if (victim->d_inode &&
449 (victim->d_parent->d_inode == dir->d_inode)) { 451 (victim->d_parent->d_inode == dir->d_inode)) {
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index f11ac5ea7021..122145b0895c 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -11,6 +11,7 @@
11#include <linux/kobject.h> 11#include <linux/kobject.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/dcache.h> 13#include <linux/dcache.h>
14#include <linux/namei.h>
14#include <linux/err.h> 15#include <linux/err.h>
15#include "sysfs.h" 16#include "sysfs.h"
16 17
@@ -68,7 +69,8 @@ void sysfs_remove_group(struct kobject * kobj,
68 struct dentry * dir; 69 struct dentry * dir;
69 70
70 if (grp->name) 71 if (grp->name)
71 dir = sysfs_get_dentry(kobj->dentry,grp->name); 72 dir = lookup_one_len(grp->name, kobj->dentry,
73 strlen(grp->name));
72 else 74 else
73 dir = dget(kobj->dentry); 75 dir = dget(kobj->dentry);
74 76
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 565cac1d4200..8de13bafaa76 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -166,16 +166,6 @@ int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
166 return error; 166 return error;
167} 167}
168 168
169struct dentry * sysfs_get_dentry(struct dentry * parent, const char * name)
170{
171 struct qstr qstr;
172
173 qstr.name = name;
174 qstr.len = strlen(name);
175 qstr.hash = full_name_hash(name,qstr.len);
176 return lookup_hash(&qstr,parent);
177}
178
179/* 169/*
180 * Get the name for corresponding element represented by the given sysfs_dirent 170 * Get the name for corresponding element represented by the given sysfs_dirent
181 */ 171 */
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 29da6f5f07c8..3f8953e0e5d0 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -7,7 +7,6 @@ extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
7 7
8extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *, 8extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
9 umode_t, int); 9 umode_t, int);
10extern struct dentry * sysfs_get_dentry(struct dentry *, const char *);
11 10
12extern int sysfs_add_file(struct dentry *, const struct attribute *, int); 11extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
13extern void sysfs_hash_and_remove(struct dentry * dir, const char * name); 12extern void sysfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 93ce257cd149..a3a4b5aaf5d9 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -149,11 +149,12 @@ linvfs_unwritten_convert(
149 */ 149 */
150STATIC void 150STATIC void
151linvfs_unwritten_convert_direct( 151linvfs_unwritten_convert_direct(
152 struct inode *inode, 152 struct kiocb *iocb,
153 loff_t offset, 153 loff_t offset,
154 ssize_t size, 154 ssize_t size,
155 void *private) 155 void *private)
156{ 156{
157 struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
157 ASSERT(!private || inode == (struct inode *)private); 158 ASSERT(!private || inode == (struct inode *)private);
158 159
159 /* private indicates an unwritten extent lay beneath this IO */ 160 /* private indicates an unwritten extent lay beneath this IO */