aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/binfmt_elf.c28
-rw-r--r--fs/block_dev.c29
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/char_dev.c40
-rw-r--r--fs/cifs/CHANGES5
-rw-r--r--fs/cifs/cifs_spnego.c2
-rw-r--r--fs/cifs/cifsacl.c4
-rw-r--r--fs/cifs/cifsencrypt.c1
-rw-r--r--fs/cifs/cifsfs.c22
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h21
-rw-r--r--fs/cifs/cifssmb.c316
-rw-r--r--fs/cifs/connect.c49
-rw-r--r--fs/cifs/dir.c2
-rw-r--r--fs/cifs/file.c43
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/transport.c17
-rw-r--r--fs/configfs/inode.c1
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/dlm/netlink.c2
-rw-r--r--fs/ext2/acl.c8
-rw-r--r--fs/ext2/acl.h4
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/namei.c4
-rw-r--r--fs/ext3/acl.c8
-rw-r--r--fs/ext3/acl.h4
-rw-r--r--fs/ext3/file.c63
-rw-r--r--fs/ext3/namei.c4
-rw-r--r--fs/ext4/acl.c8
-rw-r--r--fs/ext4/acl.h4
-rw-r--r--fs/ext4/file.c55
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/fat/file.c22
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/fs-writeback.c1119
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/gfs2/Makefile2
-rw-r--r--fs/gfs2/acl.c106
-rw-r--r--fs/gfs2/dentry.c18
-rw-r--r--fs/gfs2/eaops.c157
-rw-r--r--fs/gfs2/eaops.h30
-rw-r--r--fs/gfs2/export.c36
-rw-r--r--fs/gfs2/file.c1
-rw-r--r--fs/gfs2/incore.h15
-rw-r--r--fs/gfs2/inode.c159
-rw-r--r--fs/gfs2/ops_fstype.c66
-rw-r--r--fs/gfs2/ops_inode.c82
-rw-r--r--fs/gfs2/rgrp.c88
-rw-r--r--fs/gfs2/rgrp.h6
-rw-r--r--fs/gfs2/super.c46
-rw-r--r--fs/gfs2/super.h5
-rw-r--r--fs/gfs2/sys.c31
-rw-r--r--fs/gfs2/util.c41
-rw-r--r--fs/gfs2/xattr.c (renamed from fs/gfs2/eattr.c)425
-rw-r--r--fs/gfs2/xattr.h (renamed from fs/gfs2/eattr.h)54
-rw-r--r--fs/hugetlbfs/inode.c1
-rw-r--r--fs/jffs2/acl.c7
-rw-r--r--fs/jffs2/acl.h4
-rw-r--r--fs/jffs2/dir.c2
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jfs/acl.c7
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/lockd/host.c14
-rw-r--r--fs/lockd/mon.c44
-rw-r--r--fs/locks.c4
-rw-r--r--fs/namei.c110
-rw-r--r--fs/nfs/Makefile3
-rw-r--r--fs/nfs/cache_lib.c140
-rw-r--r--fs/nfs/cache_lib.h27
-rw-r--r--fs/nfs/callback.c26
-rw-r--r--fs/nfs/client.c16
-rw-r--r--fs/nfs/direct.c3
-rw-r--r--fs/nfs/dns_resolve.c335
-rw-r--r--fs/nfs/dns_resolve.h14
-rw-r--r--fs/nfs/file.c49
-rw-r--r--fs/nfs/idmap.c6
-rw-r--r--fs/nfs/inode.c100
-rw-r--r--fs/nfs/internal.h39
-rw-r--r--fs/nfs/mount_clnt.c83
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs4namespace.c24
-rw-r--r--fs/nfs/nfs4proc.c40
-rw-r--r--fs/nfs/nfs4xdr.c1460
-rw-r--r--fs/nfs/super.c451
-rw-r--r--fs/nfs/write.c91
-rw-r--r--fs/nfsd/auth.c4
-rw-r--r--fs/nfsd/export.c14
-rw-r--r--fs/nfsd/nfs4idmap.c20
-rw-r--r--fs/nfsd/nfsctl.c21
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/Kconfig2
-rw-r--r--fs/nilfs2/bmap.c151
-rw-r--r--fs/nilfs2/bmap.h76
-rw-r--r--fs/nilfs2/btree.c625
-rw-r--r--fs/nilfs2/cpfile.c11
-rw-r--r--fs/nilfs2/cpfile.h2
-rw-r--r--fs/nilfs2/dat.c42
-rw-r--r--fs/nilfs2/dat.h8
-rw-r--r--fs/nilfs2/direct.c161
-rw-r--r--fs/nilfs2/ifile.h1
-rw-r--r--fs/nilfs2/inode.c3
-rw-r--r--fs/nilfs2/ioctl.c26
-rw-r--r--fs/nilfs2/mdt.c40
-rw-r--r--fs/nilfs2/mdt.h3
-rw-r--r--fs/nilfs2/recovery.c3
-rw-r--r--fs/nilfs2/segbuf.c4
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/nilfs2/sufile.h1
-rw-r--r--fs/nilfs2/super.c100
-rw-r--r--fs/nilfs2/the_nilfs.c15
-rw-r--r--fs/nilfs2/the_nilfs.h43
-rw-r--r--fs/ntfs/file.c16
-rw-r--r--fs/ntfs/mft.c13
-rw-r--r--fs/ocfs2/dlm/dlmfs.c1
-rw-r--r--fs/ocfs2/file.c49
-rw-r--r--fs/open.c12
-rw-r--r--fs/partitions/check.c12
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/splice.c30
-rw-r--r--fs/super.c5
-rw-r--r--fs/sync.c76
-rw-r--r--fs/sysfs/dir.c1
-rw-r--r--fs/sysfs/inode.c135
-rw-r--r--fs/sysfs/symlink.c2
-rw-r--r--fs/sysfs/sysfs.h12
-rw-r--r--fs/ubifs/budget.c16
-rw-r--r--fs/ubifs/super.c9
-rw-r--r--fs/udf/directory.c86
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/inode.c19
-rw-r--r--fs/udf/lowlevel.c4
-rw-r--r--fs/udf/namei.c1
-rw-r--r--fs/xattr.c55
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c3
144 files changed, 4675 insertions, 3687 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 0e7da7bb5d93..455aa207e67e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -43,6 +43,7 @@ source "fs/xfs/Kconfig"
43source "fs/gfs2/Kconfig" 43source "fs/gfs2/Kconfig"
44source "fs/ocfs2/Kconfig" 44source "fs/ocfs2/Kconfig"
45source "fs/btrfs/Kconfig" 45source "fs/btrfs/Kconfig"
46source "fs/nilfs2/Kconfig"
46 47
47endif # BLOCK 48endif # BLOCK
48 49
@@ -186,7 +187,6 @@ source "fs/romfs/Kconfig"
186source "fs/sysv/Kconfig" 187source "fs/sysv/Kconfig"
187source "fs/ufs/Kconfig" 188source "fs/ufs/Kconfig"
188source "fs/exofs/Kconfig" 189source "fs/exofs/Kconfig"
189source "fs/nilfs2/Kconfig"
190 190
191endif # MISC_FILESYSTEMS 191endif # MISC_FILESYSTEMS
192 192
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b7c1603cd4bd..7c1e65d54872 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -501,22 +501,22 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
501 } 501 }
502 } 502 }
503 503
504 /* 504 if (last_bss > elf_bss) {
505 * Now fill out the bss section. First pad the last page up 505 /*
506 * to the page boundary, and then perform a mmap to make sure 506 * Now fill out the bss section. First pad the last page up
507 * that there are zero-mapped pages up to and including the 507 * to the page boundary, and then perform a mmap to make sure
508 * last bss page. 508 * that there are zero-mapped pages up to and including the
509 */ 509 * last bss page.
510 if (padzero(elf_bss)) { 510 */
511 error = -EFAULT; 511 if (padzero(elf_bss)) {
512 goto out_close; 512 error = -EFAULT;
513 } 513 goto out_close;
514 }
514 515
515 /* What we have mapped so far */ 516 /* What we have mapped so far */
516 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); 517 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
517 518
518 /* Map the last of the bss segment */ 519 /* Map the last of the bss segment */
519 if (last_bss > elf_bss) {
520 down_write(&current->mm->mmap_sem); 520 down_write(&current->mm->mmap_sem);
521 error = do_brk(elf_bss, last_bss - elf_bss); 521 error = do_brk(elf_bss, last_bss - elf_bss);
522 up_write(&current->mm->mmap_sem); 522 up_write(&current->mm->mmap_sem);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 94dfda24c06e..3581a4e53942 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1405,6 +1405,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1405} 1405}
1406 1406
1407/* 1407/*
1408 * Write data to the block device. Only intended for the block device itself
1409 * and the raw driver which basically is a fake block device.
1410 *
1411 * Does not take i_mutex for the write and thus is not for general purpose
1412 * use.
1413 */
1414ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1415 unsigned long nr_segs, loff_t pos)
1416{
1417 struct file *file = iocb->ki_filp;
1418 ssize_t ret;
1419
1420 BUG_ON(iocb->ki_pos != pos);
1421
1422 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1423 if (ret > 0 || ret == -EIOCBQUEUED) {
1424 ssize_t err;
1425
1426 err = generic_write_sync(file, pos, ret);
1427 if (err < 0 && ret > 0)
1428 ret = err;
1429 }
1430 return ret;
1431}
1432EXPORT_SYMBOL_GPL(blkdev_aio_write);
1433
1434/*
1408 * Try to release a page associated with block device when the system 1435 * Try to release a page associated with block device when the system
1409 * is under memory pressure. 1436 * is under memory pressure.
1410 */ 1437 */
@@ -1436,7 +1463,7 @@ const struct file_operations def_blk_fops = {
1436 .read = do_sync_read, 1463 .read = do_sync_read,
1437 .write = do_sync_write, 1464 .write = do_sync_write,
1438 .aio_read = generic_file_aio_read, 1465 .aio_read = generic_file_aio_read,
1439 .aio_write = generic_file_aio_write_nolock, 1466 .aio_write = blkdev_aio_write,
1440 .mmap = generic_file_mmap, 1467 .mmap = generic_file_mmap,
1441 .fsync = block_fsync, 1468 .fsync = block_fsync,
1442 .unlocked_ioctl = block_ioctl, 1469 .unlocked_ioctl = block_ioctl,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e83be2e4602c..15831d5c7367 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1352,6 +1352,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1352{ 1352{
1353 int err; 1353 int err;
1354 1354
1355 bdi->name = "btrfs";
1355 bdi->capabilities = BDI_CAP_MAP_COPY; 1356 bdi->capabilities = BDI_CAP_MAP_COPY;
1356 err = bdi_init(bdi); 1357 err = bdi_init(bdi);
1357 if (err) 1358 if (err)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 72a2b9c28e9f..535f85ba104f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1511,7 +1511,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1511static void btrfs_issue_discard(struct block_device *bdev, 1511static void btrfs_issue_discard(struct block_device *bdev,
1512 u64 start, u64 len) 1512 u64 start, u64 len)
1513{ 1513{
1514 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); 1514 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1515 DISCARD_FL_BARRIER);
1515} 1516}
1516#endif 1517#endif
1517 1518
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5dbefd11b4af..5cf405b0828d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -260,7 +260,7 @@ loop_lock:
260 num_run++; 260 num_run++;
261 batch_run++; 261 batch_run++;
262 262
263 if (bio_sync(cur)) 263 if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
264 num_sync_run++; 264 num_sync_run++;
265 265
266 if (need_resched()) { 266 if (need_resched()) {
@@ -2903,7 +2903,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
2903 bio->bi_rw |= rw; 2903 bio->bi_rw |= rw;
2904 2904
2905 spin_lock(&device->io_lock); 2905 spin_lock(&device->io_lock);
2906 if (bio_sync(bio)) 2906 if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
2907 pending_bios = &device->pending_sync_bios; 2907 pending_bios = &device->pending_sync_bios;
2908 else 2908 else
2909 pending_bios = &device->pending_bios; 2909 pending_bios = &device->pending_bios;
diff --git a/fs/buffer.c b/fs/buffer.c
index 28f320fac4d4..90a98865b0cc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -281,7 +281,7 @@ static void free_more_memory(void)
281 struct zone *zone; 281 struct zone *zone;
282 int nid; 282 int nid;
283 283
284 wakeup_pdflush(1024); 284 wakeup_flusher_threads(1024);
285 yield(); 285 yield();
286 286
287 for_each_online_node(nid) { 287 for_each_online_node(nid) {
diff --git a/fs/char_dev.c b/fs/char_dev.c
index a173551e19d7..3cbc57f932d2 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -31,6 +31,7 @@
31 * - no readahead or I/O queue unplugging required 31 * - no readahead or I/O queue unplugging required
32 */ 32 */
33struct backing_dev_info directly_mappable_cdev_bdi = { 33struct backing_dev_info directly_mappable_cdev_bdi = {
34 .name = "char",
34 .capabilities = ( 35 .capabilities = (
35#ifdef CONFIG_MMU 36#ifdef CONFIG_MMU
36 /* permit private copies of the data to be taken */ 37 /* permit private copies of the data to be taken */
@@ -237,8 +238,10 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
237} 238}
238 239
239/** 240/**
240 * register_chrdev() - Register a major number for character devices. 241 * __register_chrdev() - create and register a cdev occupying a range of minors
241 * @major: major device number or 0 for dynamic allocation 242 * @major: major device number or 0 for dynamic allocation
243 * @baseminor: first of the requested range of minor numbers
244 * @count: the number of minor numbers required
242 * @name: name of this range of devices 245 * @name: name of this range of devices
243 * @fops: file operations associated with this devices 246 * @fops: file operations associated with this devices
244 * 247 *
@@ -254,19 +257,17 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
254 * /dev. It only helps to keep track of the different owners of devices. If 257 * /dev. It only helps to keep track of the different owners of devices. If
255 * your module name has only one type of devices it's ok to use e.g. the name 258 * your module name has only one type of devices it's ok to use e.g. the name
256 * of the module here. 259 * of the module here.
257 *
258 * This function registers a range of 256 minor numbers. The first minor number
259 * is 0.
260 */ 260 */
261int register_chrdev(unsigned int major, const char *name, 261int __register_chrdev(unsigned int major, unsigned int baseminor,
262 const struct file_operations *fops) 262 unsigned int count, const char *name,
263 const struct file_operations *fops)
263{ 264{
264 struct char_device_struct *cd; 265 struct char_device_struct *cd;
265 struct cdev *cdev; 266 struct cdev *cdev;
266 char *s; 267 char *s;
267 int err = -ENOMEM; 268 int err = -ENOMEM;
268 269
269 cd = __register_chrdev_region(major, 0, 256, name); 270 cd = __register_chrdev_region(major, baseminor, count, name);
270 if (IS_ERR(cd)) 271 if (IS_ERR(cd))
271 return PTR_ERR(cd); 272 return PTR_ERR(cd);
272 273
@@ -280,7 +281,7 @@ int register_chrdev(unsigned int major, const char *name,
280 for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) 281 for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/'))
281 *s = '!'; 282 *s = '!';
282 283
283 err = cdev_add(cdev, MKDEV(cd->major, 0), 256); 284 err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
284 if (err) 285 if (err)
285 goto out; 286 goto out;
286 287
@@ -290,7 +291,7 @@ int register_chrdev(unsigned int major, const char *name,
290out: 291out:
291 kobject_put(&cdev->kobj); 292 kobject_put(&cdev->kobj);
292out2: 293out2:
293 kfree(__unregister_chrdev_region(cd->major, 0, 256)); 294 kfree(__unregister_chrdev_region(cd->major, baseminor, count));
294 return err; 295 return err;
295} 296}
296 297
@@ -316,10 +317,23 @@ void unregister_chrdev_region(dev_t from, unsigned count)
316 } 317 }
317} 318}
318 319
319void unregister_chrdev(unsigned int major, const char *name) 320/**
321 * __unregister_chrdev - unregister and destroy a cdev
322 * @major: major device number
323 * @baseminor: first of the range of minor numbers
324 * @count: the number of minor numbers this cdev is occupying
325 * @name: name of this range of devices
326 *
327 * Unregister and destroy the cdev occupying the region described by
328 * @major, @baseminor and @count. This function undoes what
329 * __register_chrdev() did.
330 */
331void __unregister_chrdev(unsigned int major, unsigned int baseminor,
332 unsigned int count, const char *name)
320{ 333{
321 struct char_device_struct *cd; 334 struct char_device_struct *cd;
322 cd = __unregister_chrdev_region(major, 0, 256); 335
336 cd = __unregister_chrdev_region(major, baseminor, count);
323 if (cd && cd->cdev) 337 if (cd && cd->cdev)
324 cdev_del(cd->cdev); 338 cdev_del(cd->cdev);
325 kfree(cd); 339 kfree(cd);
@@ -568,6 +582,6 @@ EXPORT_SYMBOL(cdev_alloc);
568EXPORT_SYMBOL(cdev_del); 582EXPORT_SYMBOL(cdev_del);
569EXPORT_SYMBOL(cdev_add); 583EXPORT_SYMBOL(cdev_add);
570EXPORT_SYMBOL(cdev_index); 584EXPORT_SYMBOL(cdev_index);
571EXPORT_SYMBOL(register_chrdev); 585EXPORT_SYMBOL(__register_chrdev);
572EXPORT_SYMBOL(unregister_chrdev); 586EXPORT_SYMBOL(__unregister_chrdev);
573EXPORT_SYMBOL(directly_mappable_cdev_bdi); 587EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index e85b1e4389e0..145540a316ab 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -3,7 +3,10 @@ Version 1.60
3Fix memory leak in reconnect. Fix oops in DFS mount error path. 3Fix memory leak in reconnect. Fix oops in DFS mount error path.
4Set s_maxbytes to smaller (the max that vfs can handle) so that 4Set s_maxbytes to smaller (the max that vfs can handle) so that
5sendfile will now work over cifs mounts again. Add noforcegid 5sendfile will now work over cifs mounts again. Add noforcegid
6and noforceuid mount parameters. 6and noforceuid mount parameters. Fix small mem leak when using
7ntlmv2. Fix 2nd mount to same server but with different port to
8be allowed (rather than reusing the 1st port) - only when the
9user explicitly overrides the port on the 2nd mount.
7 10
8Version 1.59 11Version 1.59
9------------ 12------------
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 051caecf7d67..8ec7736ce954 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -125,7 +125,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
125 if (server->addr.sockAddr.sin_family == AF_INET) 125 if (server->addr.sockAddr.sin_family == AF_INET)
126 sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr); 126 sprintf(dp, "ip4=%pI4", &server->addr.sockAddr.sin_addr);
127 else if (server->addr.sockAddr.sin_family == AF_INET6) 127 else if (server->addr.sockAddr.sin_family == AF_INET6)
128 sprintf(dp, "ip6=%pi6", &server->addr.sockAddr6.sin6_addr); 128 sprintf(dp, "ip6=%pI6", &server->addr.sockAddr6.sin6_addr);
129 else 129 else
130 goto out; 130 goto out;
131 131
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 6941c22398a6..7dfe0842a6f6 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -607,7 +607,7 @@ static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
607 return get_cifs_acl_by_path(cifs_sb, path, pacllen); 607 return get_cifs_acl_by_path(cifs_sb, path, pacllen);
608 608
609 pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen); 609 pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen);
610 atomic_dec(&open_file->wrtPending); 610 cifsFileInfo_put(open_file);
611 return pntsd; 611 return pntsd;
612} 612}
613 613
@@ -665,7 +665,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
665 return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); 665 return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen);
666 666
667 rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen); 667 rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen);
668 atomic_dec(&open_file->wrtPending); 668 cifsFileInfo_put(open_file);
669 return rc; 669 return rc;
670} 670}
671 671
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7c9809523f42..7efe1745494d 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -373,6 +373,7 @@ calc_exit_2:
373 compare with the NTLM example */ 373 compare with the NTLM example */
374 hmac_md5_final(ses->server->ntlmv2_hash, pctxt); 374 hmac_md5_final(ses->server->ntlmv2_hash, pctxt);
375 375
376 kfree(pctxt);
376 return rc; 377 return rc;
377} 378}
378 379
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 84b75253b05a..3610e9958b4c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -361,13 +361,10 @@ cifs_show_address(struct seq_file *s, struct TCP_Server_Info *server)
361static int 361static int
362cifs_show_options(struct seq_file *s, struct vfsmount *m) 362cifs_show_options(struct seq_file *s, struct vfsmount *m)
363{ 363{
364 struct cifs_sb_info *cifs_sb; 364 struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb);
365 struct cifsTconInfo *tcon; 365 struct cifsTconInfo *tcon = cifs_sb->tcon;
366
367 cifs_sb = CIFS_SB(m->mnt_sb);
368 tcon = cifs_sb->tcon;
369 366
370 seq_printf(s, ",unc=%s", cifs_sb->tcon->treeName); 367 seq_printf(s, ",unc=%s", tcon->treeName);
371 if (tcon->ses->userName) 368 if (tcon->ses->userName)
372 seq_printf(s, ",username=%s", tcon->ses->userName); 369 seq_printf(s, ",username=%s", tcon->ses->userName);
373 if (tcon->ses->domainName) 370 if (tcon->ses->domainName)
@@ -989,19 +986,19 @@ static int cifs_oplock_thread(void *dummyarg)
989 if (try_to_freeze()) 986 if (try_to_freeze())
990 continue; 987 continue;
991 988
992 spin_lock(&GlobalMid_Lock); 989 spin_lock(&cifs_oplock_lock);
993 if (list_empty(&GlobalOplock_Q)) { 990 if (list_empty(&cifs_oplock_list)) {
994 spin_unlock(&GlobalMid_Lock); 991 spin_unlock(&cifs_oplock_lock);
995 set_current_state(TASK_INTERRUPTIBLE); 992 set_current_state(TASK_INTERRUPTIBLE);
996 schedule_timeout(39*HZ); 993 schedule_timeout(39*HZ);
997 } else { 994 } else {
998 oplock_item = list_entry(GlobalOplock_Q.next, 995 oplock_item = list_entry(cifs_oplock_list.next,
999 struct oplock_q_entry, qhead); 996 struct oplock_q_entry, qhead);
1000 cFYI(1, ("found oplock item to write out")); 997 cFYI(1, ("found oplock item to write out"));
1001 pTcon = oplock_item->tcon; 998 pTcon = oplock_item->tcon;
1002 inode = oplock_item->pinode; 999 inode = oplock_item->pinode;
1003 netfid = oplock_item->netfid; 1000 netfid = oplock_item->netfid;
1004 spin_unlock(&GlobalMid_Lock); 1001 spin_unlock(&cifs_oplock_lock);
1005 DeleteOplockQEntry(oplock_item); 1002 DeleteOplockQEntry(oplock_item);
1006 /* can not grab inode sem here since it would 1003 /* can not grab inode sem here since it would
1007 deadlock when oplock received on delete 1004 deadlock when oplock received on delete
@@ -1058,7 +1055,7 @@ init_cifs(void)
1058 int rc = 0; 1055 int rc = 0;
1059 cifs_proc_init(); 1056 cifs_proc_init();
1060 INIT_LIST_HEAD(&cifs_tcp_ses_list); 1057 INIT_LIST_HEAD(&cifs_tcp_ses_list);
1061 INIT_LIST_HEAD(&GlobalOplock_Q); 1058 INIT_LIST_HEAD(&cifs_oplock_list);
1062#ifdef CONFIG_CIFS_EXPERIMENTAL 1059#ifdef CONFIG_CIFS_EXPERIMENTAL
1063 INIT_LIST_HEAD(&GlobalDnotifyReqList); 1060 INIT_LIST_HEAD(&GlobalDnotifyReqList);
1064 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q); 1061 INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
@@ -1087,6 +1084,7 @@ init_cifs(void)
1087 rwlock_init(&GlobalSMBSeslock); 1084 rwlock_init(&GlobalSMBSeslock);
1088 rwlock_init(&cifs_tcp_ses_lock); 1085 rwlock_init(&cifs_tcp_ses_lock);
1089 spin_lock_init(&GlobalMid_Lock); 1086 spin_lock_init(&GlobalMid_Lock);
1087 spin_lock_init(&cifs_oplock_lock);
1090 1088
1091 if (cifs_max_pending < 2) { 1089 if (cifs_max_pending < 2) {
1092 cifs_max_pending = 2; 1090 cifs_max_pending = 2;
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 6c170948300d..094325e3f714 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
113extern const struct export_operations cifs_export_ops; 113extern const struct export_operations cifs_export_ops;
114#endif /* EXPERIMENTAL */ 114#endif /* EXPERIMENTAL */
115 115
116#define CIFS_VERSION "1.60" 116#define CIFS_VERSION "1.61"
117#endif /* _CIFSFS_H */ 117#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 6084d6379c03..6cfc81a32703 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -351,11 +351,24 @@ struct cifsFileInfo {
351 bool closePend:1; /* file is marked to close */ 351 bool closePend:1; /* file is marked to close */
352 bool invalidHandle:1; /* file closed via session abend */ 352 bool invalidHandle:1; /* file closed via session abend */
353 bool messageMode:1; /* for pipes: message vs byte mode */ 353 bool messageMode:1; /* for pipes: message vs byte mode */
354 atomic_t wrtPending; /* handle in use - defer close */ 354 atomic_t count; /* reference count */
355 struct mutex fh_mutex; /* prevents reopen race after dead ses*/ 355 struct mutex fh_mutex; /* prevents reopen race after dead ses*/
356 struct cifs_search_info srch_inf; 356 struct cifs_search_info srch_inf;
357}; 357};
358 358
359/* Take a reference on the file private data */
360static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file)
361{
362 atomic_inc(&cifs_file->count);
363}
364
365/* Release a reference on the file private data */
366static inline void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
367{
368 if (atomic_dec_and_test(&cifs_file->count))
369 kfree(cifs_file);
370}
371
359/* 372/*
360 * One of these for each file inode 373 * One of these for each file inode
361 */ 374 */
@@ -656,7 +669,11 @@ GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock;
656 */ 669 */
657GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; 670GLOBAL_EXTERN rwlock_t GlobalSMBSeslock;
658 671
659GLOBAL_EXTERN struct list_head GlobalOplock_Q; 672/* Global list of oplocks */
673GLOBAL_EXTERN struct list_head cifs_oplock_list;
674
675/* Protects the cifs_oplock_list */
676GLOBAL_EXTERN spinlock_t cifs_oplock_lock;
660 677
661/* Outstanding dir notify requests */ 678/* Outstanding dir notify requests */
662GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; 679GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 1866bc2927d4..301e307e1279 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -100,110 +100,138 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
100 to this tcon */ 100 to this tcon */
101} 101}
102 102
103/* Allocate and return pointer to an SMB request buffer, and set basic 103/* reconnect the socket, tcon, and smb session if needed */
104 SMB information in the SMB header. If the return code is zero, this
105 function must have filled in request_buf pointer */
106static int 104static int
107small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, 105cifs_reconnect_tcon(struct cifsTconInfo *tcon, int smb_command)
108 void **request_buf)
109{ 106{
110 int rc = 0; 107 int rc = 0;
108 struct cifsSesInfo *ses;
109 struct TCP_Server_Info *server;
110 struct nls_table *nls_codepage;
111 111
112 /* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so 112 /*
113 check for tcp and smb session status done differently 113 * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for
114 for those three - in the calling routine */ 114 * tcp and smb session status done differently for those three - in the
115 if (tcon) { 115 * calling routine
116 if (tcon->tidStatus == CifsExiting) { 116 */
117 /* only tree disconnect, open, and write, 117 if (!tcon)
118 (and ulogoff which does not have tcon) 118 return 0;
119 are allowed as we start force umount */ 119
120 if ((smb_command != SMB_COM_WRITE_ANDX) && 120 ses = tcon->ses;
121 (smb_command != SMB_COM_OPEN_ANDX) && 121 server = ses->server;
122 (smb_command != SMB_COM_TREE_DISCONNECT)) { 122
123 cFYI(1, ("can not send cmd %d while umounting", 123 /*
124 smb_command)); 124 * only tree disconnect, open, and write, (and ulogoff which does not
125 return -ENODEV; 125 * have tcon) are allowed as we start force umount
126 } 126 */
127 if (tcon->tidStatus == CifsExiting) {
128 if (smb_command != SMB_COM_WRITE_ANDX &&
129 smb_command != SMB_COM_OPEN_ANDX &&
130 smb_command != SMB_COM_TREE_DISCONNECT) {
131 cFYI(1, ("can not send cmd %d while umounting",
132 smb_command));
133 return -ENODEV;
127 } 134 }
128 if ((tcon->ses) && (tcon->ses->status != CifsExiting) && 135 }
129 (tcon->ses->server)) {
130 struct nls_table *nls_codepage;
131 /* Give Demultiplex thread up to 10 seconds to
132 reconnect, should be greater than cifs socket
133 timeout which is 7 seconds */
134 while (tcon->ses->server->tcpStatus ==
135 CifsNeedReconnect) {
136 wait_event_interruptible_timeout(tcon->ses->server->response_q,
137 (tcon->ses->server->tcpStatus ==
138 CifsGood), 10 * HZ);
139 if (tcon->ses->server->tcpStatus ==
140 CifsNeedReconnect) {
141 /* on "soft" mounts we wait once */
142 if (!tcon->retry ||
143 (tcon->ses->status == CifsExiting)) {
144 cFYI(1, ("gave up waiting on "
145 "reconnect in smb_init"));
146 return -EHOSTDOWN;
147 } /* else "hard" mount - keep retrying
148 until process is killed or server
149 comes back on-line */
150 } else /* TCP session is reestablished now */
151 break;
152 }
153 136
154 nls_codepage = load_nls_default(); 137 if (ses->status == CifsExiting)
155 /* need to prevent multiple threads trying to 138 return -EIO;
156 simultaneously reconnect the same SMB session */
157 down(&tcon->ses->sesSem);
158 if (tcon->ses->need_reconnect)
159 rc = cifs_setup_session(0, tcon->ses,
160 nls_codepage);
161 if (!rc && (tcon->need_reconnect)) {
162 mark_open_files_invalid(tcon);
163 rc = CIFSTCon(0, tcon->ses, tcon->treeName,
164 tcon, nls_codepage);
165 up(&tcon->ses->sesSem);
166 /* BB FIXME add code to check if wsize needs
167 update due to negotiated smb buffer size
168 shrinking */
169 if (rc == 0) {
170 atomic_inc(&tconInfoReconnectCount);
171 /* tell server Unix caps we support */
172 if (tcon->ses->capabilities & CAP_UNIX)
173 reset_cifs_unix_caps(
174 0 /* no xid */,
175 tcon,
176 NULL /* we do not know sb */,
177 NULL /* no vol info */);
178 }
179 139
180 cFYI(1, ("reconnect tcon rc = %d", rc)); 140 /*
181 /* Removed call to reopen open files here. 141 * Give demultiplex thread up to 10 seconds to reconnect, should be
182 It is safer (and faster) to reopen files 142 * greater than cifs socket timeout which is 7 seconds
183 one at a time as needed in read and write */ 143 */
184 144 while (server->tcpStatus == CifsNeedReconnect) {
185 /* Check if handle based operation so we 145 wait_event_interruptible_timeout(server->response_q,
186 know whether we can continue or not without 146 (server->tcpStatus == CifsGood), 10 * HZ);
187 returning to caller to reset file handle */
188 switch (smb_command) {
189 case SMB_COM_READ_ANDX:
190 case SMB_COM_WRITE_ANDX:
191 case SMB_COM_CLOSE:
192 case SMB_COM_FIND_CLOSE2:
193 case SMB_COM_LOCKING_ANDX: {
194 unload_nls(nls_codepage);
195 return -EAGAIN;
196 }
197 }
198 } else {
199 up(&tcon->ses->sesSem);
200 }
201 unload_nls(nls_codepage);
202 147
203 } else { 148 /* is TCP session is reestablished now ?*/
204 return -EIO; 149 if (server->tcpStatus != CifsNeedReconnect)
150 break;
151
152 /*
153 * on "soft" mounts we wait once. Hard mounts keep
154 * retrying until process is killed or server comes
155 * back on-line
156 */
157 if (!tcon->retry || ses->status == CifsExiting) {
158 cFYI(1, ("gave up waiting on reconnect in smb_init"));
159 return -EHOSTDOWN;
205 } 160 }
206 } 161 }
162
163 if (!ses->need_reconnect && !tcon->need_reconnect)
164 return 0;
165
166 nls_codepage = load_nls_default();
167
168 /*
169 * need to prevent multiple threads trying to simultaneously
170 * reconnect the same SMB session
171 */
172 down(&ses->sesSem);
173 if (ses->need_reconnect)
174 rc = cifs_setup_session(0, ses, nls_codepage);
175
176 /* do we need to reconnect tcon? */
177 if (rc || !tcon->need_reconnect) {
178 up(&ses->sesSem);
179 goto out;
180 }
181
182 mark_open_files_invalid(tcon);
183 rc = CIFSTCon(0, ses, tcon->treeName, tcon, nls_codepage);
184 up(&ses->sesSem);
185 cFYI(1, ("reconnect tcon rc = %d", rc));
186
187 if (rc)
188 goto out;
189
190 /*
191 * FIXME: check if wsize needs updated due to negotiated smb buffer
192 * size shrinking
193 */
194 atomic_inc(&tconInfoReconnectCount);
195
196 /* tell server Unix caps we support */
197 if (ses->capabilities & CAP_UNIX)
198 reset_cifs_unix_caps(0, tcon, NULL, NULL);
199
200 /*
201 * Removed call to reopen open files here. It is safer (and faster) to
202 * reopen files one at a time as needed in read and write.
203 *
204 * FIXME: what about file locks? don't we need to reclaim them ASAP?
205 */
206
207out:
208 /*
209 * Check if handle based operation so we know whether we can continue
210 * or not without returning to caller to reset file handle
211 */
212 switch (smb_command) {
213 case SMB_COM_READ_ANDX:
214 case SMB_COM_WRITE_ANDX:
215 case SMB_COM_CLOSE:
216 case SMB_COM_FIND_CLOSE2:
217 case SMB_COM_LOCKING_ANDX:
218 rc = -EAGAIN;
219 }
220
221 unload_nls(nls_codepage);
222 return rc;
223}
224
225/* Allocate and return pointer to an SMB request buffer, and set basic
226 SMB information in the SMB header. If the return code is zero, this
227 function must have filled in request_buf pointer */
228static int
229small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
230 void **request_buf)
231{
232 int rc = 0;
233
234 rc = cifs_reconnect_tcon(tcon, smb_command);
207 if (rc) 235 if (rc)
208 return rc; 236 return rc;
209 237
@@ -256,101 +284,7 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
256{ 284{
257 int rc = 0; 285 int rc = 0;
258 286
259 /* SMBs NegProt, SessSetup, uLogoff do not have tcon yet so 287 rc = cifs_reconnect_tcon(tcon, smb_command);
260 check for tcp and smb session status done differently
261 for those three - in the calling routine */
262 if (tcon) {
263 if (tcon->tidStatus == CifsExiting) {
264 /* only tree disconnect, open, and write,
265 (and ulogoff which does not have tcon)
266 are allowed as we start force umount */
267 if ((smb_command != SMB_COM_WRITE_ANDX) &&
268 (smb_command != SMB_COM_OPEN_ANDX) &&
269 (smb_command != SMB_COM_TREE_DISCONNECT)) {
270 cFYI(1, ("can not send cmd %d while umounting",
271 smb_command));
272 return -ENODEV;
273 }
274 }
275
276 if ((tcon->ses) && (tcon->ses->status != CifsExiting) &&
277 (tcon->ses->server)) {
278 struct nls_table *nls_codepage;
279 /* Give Demultiplex thread up to 10 seconds to
280 reconnect, should be greater than cifs socket
281 timeout which is 7 seconds */
282 while (tcon->ses->server->tcpStatus ==
283 CifsNeedReconnect) {
284 wait_event_interruptible_timeout(tcon->ses->server->response_q,
285 (tcon->ses->server->tcpStatus ==
286 CifsGood), 10 * HZ);
287 if (tcon->ses->server->tcpStatus ==
288 CifsNeedReconnect) {
289 /* on "soft" mounts we wait once */
290 if (!tcon->retry ||
291 (tcon->ses->status == CifsExiting)) {
292 cFYI(1, ("gave up waiting on "
293 "reconnect in smb_init"));
294 return -EHOSTDOWN;
295 } /* else "hard" mount - keep retrying
296 until process is killed or server
297 comes on-line */
298 } else /* TCP session is reestablished now */
299 break;
300 }
301 nls_codepage = load_nls_default();
302 /* need to prevent multiple threads trying to
303 simultaneously reconnect the same SMB session */
304 down(&tcon->ses->sesSem);
305 if (tcon->ses->need_reconnect)
306 rc = cifs_setup_session(0, tcon->ses,
307 nls_codepage);
308 if (!rc && (tcon->need_reconnect)) {
309 mark_open_files_invalid(tcon);
310 rc = CIFSTCon(0, tcon->ses, tcon->treeName,
311 tcon, nls_codepage);
312 up(&tcon->ses->sesSem);
313 /* BB FIXME add code to check if wsize needs
314 update due to negotiated smb buffer size
315 shrinking */
316 if (rc == 0) {
317 atomic_inc(&tconInfoReconnectCount);
318 /* tell server Unix caps we support */
319 if (tcon->ses->capabilities & CAP_UNIX)
320 reset_cifs_unix_caps(
321 0 /* no xid */,
322 tcon,
323 NULL /* do not know sb */,
324 NULL /* no vol info */);
325 }
326
327 cFYI(1, ("reconnect tcon rc = %d", rc));
328 /* Removed call to reopen open files here.
329 It is safer (and faster) to reopen files
330 one at a time as needed in read and write */
331
332 /* Check if handle based operation so we
333 know whether we can continue or not without
334 returning to caller to reset file handle */
335 switch (smb_command) {
336 case SMB_COM_READ_ANDX:
337 case SMB_COM_WRITE_ANDX:
338 case SMB_COM_CLOSE:
339 case SMB_COM_FIND_CLOSE2:
340 case SMB_COM_LOCKING_ANDX: {
341 unload_nls(nls_codepage);
342 return -EAGAIN;
343 }
344 }
345 } else {
346 up(&tcon->ses->sesSem);
347 }
348 unload_nls(nls_codepage);
349
350 } else {
351 return -EIO;
352 }
353 }
354 if (rc) 288 if (rc)
355 return rc; 289 return rc;
356 290
@@ -3961,6 +3895,10 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
3961 if (is_unicode) { 3895 if (is_unicode) {
3962 __le16 *tmp = kmalloc(strlen(searchName)*2 + 2, 3896 __le16 *tmp = kmalloc(strlen(searchName)*2 + 2,
3963 GFP_KERNEL); 3897 GFP_KERNEL);
3898 if (tmp == NULL) {
3899 rc = -ENOMEM;
3900 goto parse_DFS_referrals_exit;
3901 }
3964 cifsConvertToUCS((__le16 *) tmp, searchName, 3902 cifsConvertToUCS((__le16 *) tmp, searchName,
3965 PATH_MAX, nls_codepage, remap); 3903 PATH_MAX, nls_codepage, remap);
3966 node->path_consumed = cifs_ucs2_bytes(tmp, 3904 node->path_consumed = cifs_ucs2_bytes(tmp,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 1f3345d7fa79..d49682433c20 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1377,7 +1377,7 @@ cifs_parse_mount_options(char *options, const char *devname,
1377} 1377}
1378 1378
1379static struct TCP_Server_Info * 1379static struct TCP_Server_Info *
1380cifs_find_tcp_session(struct sockaddr_storage *addr) 1380cifs_find_tcp_session(struct sockaddr_storage *addr, unsigned short int port)
1381{ 1381{
1382 struct list_head *tmp; 1382 struct list_head *tmp;
1383 struct TCP_Server_Info *server; 1383 struct TCP_Server_Info *server;
@@ -1397,16 +1397,37 @@ cifs_find_tcp_session(struct sockaddr_storage *addr)
1397 if (server->tcpStatus == CifsNew) 1397 if (server->tcpStatus == CifsNew)
1398 continue; 1398 continue;
1399 1399
1400 if (addr->ss_family == AF_INET && 1400 switch (addr->ss_family) {
1401 (addr4->sin_addr.s_addr != 1401 case AF_INET:
1402 server->addr.sockAddr.sin_addr.s_addr)) 1402 if (addr4->sin_addr.s_addr ==
1403 continue; 1403 server->addr.sockAddr.sin_addr.s_addr) {
1404 else if (addr->ss_family == AF_INET6 && 1404 addr4->sin_port = htons(port);
1405 (!ipv6_addr_equal(&server->addr.sockAddr6.sin6_addr, 1405 /* user overrode default port? */
1406 &addr6->sin6_addr) || 1406 if (addr4->sin_port) {
1407 server->addr.sockAddr6.sin6_scope_id != 1407 if (addr4->sin_port !=
1408 addr6->sin6_scope_id)) 1408 server->addr.sockAddr.sin_port)
1409 continue; 1409 continue;
1410 }
1411 break;
1412 } else
1413 continue;
1414
1415 case AF_INET6:
1416 if (ipv6_addr_equal(&addr6->sin6_addr,
1417 &server->addr.sockAddr6.sin6_addr) &&
1418 (addr6->sin6_scope_id ==
1419 server->addr.sockAddr6.sin6_scope_id)) {
1420 addr6->sin6_port = htons(port);
1421 /* user overrode default port? */
1422 if (addr6->sin6_port) {
1423 if (addr6->sin6_port !=
1424 server->addr.sockAddr6.sin6_port)
1425 continue;
1426 }
1427 break;
1428 } else
1429 continue;
1430 }
1410 1431
1411 ++server->srv_count; 1432 ++server->srv_count;
1412 write_unlock(&cifs_tcp_ses_lock); 1433 write_unlock(&cifs_tcp_ses_lock);
@@ -1475,7 +1496,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1475 } 1496 }
1476 1497
1477 /* see if we already have a matching tcp_ses */ 1498 /* see if we already have a matching tcp_ses */
1478 tcp_ses = cifs_find_tcp_session(&addr); 1499 tcp_ses = cifs_find_tcp_session(&addr, volume_info->port);
1479 if (tcp_ses) 1500 if (tcp_ses)
1480 return tcp_ses; 1501 return tcp_ses;
1481 1502
@@ -2636,9 +2657,9 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
2636 return -EIO; 2657 return -EIO;
2637 2658
2638 smb_buffer = cifs_buf_get(); 2659 smb_buffer = cifs_buf_get();
2639 if (smb_buffer == NULL) { 2660 if (smb_buffer == NULL)
2640 return -ENOMEM; 2661 return -ENOMEM;
2641 } 2662
2642 smb_buffer_response = smb_buffer; 2663 smb_buffer_response = smb_buffer;
2643 2664
2644 header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, 2665 header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX,
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 4326ffd90fa9..a6424cfc0121 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -153,7 +153,7 @@ cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle,
153 mutex_init(&pCifsFile->fh_mutex); 153 mutex_init(&pCifsFile->fh_mutex);
154 mutex_init(&pCifsFile->lock_mutex); 154 mutex_init(&pCifsFile->lock_mutex);
155 INIT_LIST_HEAD(&pCifsFile->llist); 155 INIT_LIST_HEAD(&pCifsFile->llist);
156 atomic_set(&pCifsFile->wrtPending, 0); 156 atomic_set(&pCifsFile->count, 1);
157 157
158 /* set the following in open now 158 /* set the following in open now
159 pCifsFile->pfile = file; */ 159 pCifsFile->pfile = file; */
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c34b7f8a217b..fa7beac8b80e 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -53,11 +53,9 @@ static inline struct cifsFileInfo *cifs_init_private(
53 private_data->pInode = inode; 53 private_data->pInode = inode;
54 private_data->invalidHandle = false; 54 private_data->invalidHandle = false;
55 private_data->closePend = false; 55 private_data->closePend = false;
56 /* we have to track num writers to the inode, since writepages 56 /* Initialize reference count to one. The private data is
57 does not tell us which handle the write is for so there can 57 freed on the release of the last reference */
58 be a close (overlapping with write) of the filehandle that 58 atomic_set(&private_data->count, 1);
59 cifs_writepages chose to use */
60 atomic_set(&private_data->wrtPending, 0);
61 59
62 return private_data; 60 return private_data;
63} 61}
@@ -643,7 +641,7 @@ int cifs_close(struct inode *inode, struct file *file)
643 if (!pTcon->need_reconnect) { 641 if (!pTcon->need_reconnect) {
644 write_unlock(&GlobalSMBSeslock); 642 write_unlock(&GlobalSMBSeslock);
645 timeout = 2; 643 timeout = 2;
646 while ((atomic_read(&pSMBFile->wrtPending) != 0) 644 while ((atomic_read(&pSMBFile->count) != 1)
647 && (timeout <= 2048)) { 645 && (timeout <= 2048)) {
648 /* Give write a better chance to get to 646 /* Give write a better chance to get to
649 server ahead of the close. We do not 647 server ahead of the close. We do not
@@ -657,8 +655,6 @@ int cifs_close(struct inode *inode, struct file *file)
657 msleep(timeout); 655 msleep(timeout);
658 timeout *= 4; 656 timeout *= 4;
659 } 657 }
660 if (atomic_read(&pSMBFile->wrtPending))
661 cERROR(1, ("close with pending write"));
662 if (!pTcon->need_reconnect && 658 if (!pTcon->need_reconnect &&
663 !pSMBFile->invalidHandle) 659 !pSMBFile->invalidHandle)
664 rc = CIFSSMBClose(xid, pTcon, 660 rc = CIFSSMBClose(xid, pTcon,
@@ -681,24 +677,7 @@ int cifs_close(struct inode *inode, struct file *file)
681 list_del(&pSMBFile->flist); 677 list_del(&pSMBFile->flist);
682 list_del(&pSMBFile->tlist); 678 list_del(&pSMBFile->tlist);
683 write_unlock(&GlobalSMBSeslock); 679 write_unlock(&GlobalSMBSeslock);
684 timeout = 10; 680 cifsFileInfo_put(file->private_data);
685 /* We waited above to give the SMBWrite a chance to issue
686 on the wire (so we do not get SMBWrite returning EBADF
687 if writepages is racing with close. Note that writepages
688 does not specify a file handle, so it is possible for a file
689 to be opened twice, and the application close the "wrong"
690 file handle - in these cases we delay long enough to allow
691 the SMBWrite to get on the wire before the SMB Close.
692 We allow total wait here over 45 seconds, more than
693 oplock break time, and more than enough to allow any write
694 to complete on the server, or to time out on the client */
695 while ((atomic_read(&pSMBFile->wrtPending) != 0)
696 && (timeout <= 50000)) {
697 cERROR(1, ("writes pending, delay free of handle"));
698 msleep(timeout);
699 timeout *= 8;
700 }
701 kfree(file->private_data);
702 file->private_data = NULL; 681 file->private_data = NULL;
703 } else 682 } else
704 rc = -EBADF; 683 rc = -EBADF;
@@ -1236,7 +1215,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1236 if (!open_file->invalidHandle) { 1215 if (!open_file->invalidHandle) {
1237 /* found a good file */ 1216 /* found a good file */
1238 /* lock it so it will not be closed on us */ 1217 /* lock it so it will not be closed on us */
1239 atomic_inc(&open_file->wrtPending); 1218 cifsFileInfo_get(open_file);
1240 read_unlock(&GlobalSMBSeslock); 1219 read_unlock(&GlobalSMBSeslock);
1241 return open_file; 1220 return open_file;
1242 } /* else might as well continue, and look for 1221 } /* else might as well continue, and look for
@@ -1276,7 +1255,7 @@ refind_writable:
1276 if (open_file->pfile && 1255 if (open_file->pfile &&
1277 ((open_file->pfile->f_flags & O_RDWR) || 1256 ((open_file->pfile->f_flags & O_RDWR) ||
1278 (open_file->pfile->f_flags & O_WRONLY))) { 1257 (open_file->pfile->f_flags & O_WRONLY))) {
1279 atomic_inc(&open_file->wrtPending); 1258 cifsFileInfo_get(open_file);
1280 1259
1281 if (!open_file->invalidHandle) { 1260 if (!open_file->invalidHandle) {
1282 /* found a good writable file */ 1261 /* found a good writable file */
@@ -1293,7 +1272,7 @@ refind_writable:
1293 else { /* start over in case this was deleted */ 1272 else { /* start over in case this was deleted */
1294 /* since the list could be modified */ 1273 /* since the list could be modified */
1295 read_lock(&GlobalSMBSeslock); 1274 read_lock(&GlobalSMBSeslock);
1296 atomic_dec(&open_file->wrtPending); 1275 cifsFileInfo_put(open_file);
1297 goto refind_writable; 1276 goto refind_writable;
1298 } 1277 }
1299 } 1278 }
@@ -1309,7 +1288,7 @@ refind_writable:
1309 read_lock(&GlobalSMBSeslock); 1288 read_lock(&GlobalSMBSeslock);
1310 /* can not use this handle, no write 1289 /* can not use this handle, no write
1311 pending on this one after all */ 1290 pending on this one after all */
1312 atomic_dec(&open_file->wrtPending); 1291 cifsFileInfo_put(open_file);
1313 1292
1314 if (open_file->closePend) /* list could have changed */ 1293 if (open_file->closePend) /* list could have changed */
1315 goto refind_writable; 1294 goto refind_writable;
@@ -1373,7 +1352,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1373 if (open_file) { 1352 if (open_file) {
1374 bytes_written = cifs_write(open_file->pfile, write_data, 1353 bytes_written = cifs_write(open_file->pfile, write_data,
1375 to-from, &offset); 1354 to-from, &offset);
1376 atomic_dec(&open_file->wrtPending); 1355 cifsFileInfo_put(open_file);
1377 /* Does mm or vfs already set times? */ 1356 /* Does mm or vfs already set times? */
1378 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb); 1357 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1379 if ((bytes_written > 0) && (offset)) 1358 if ((bytes_written > 0) && (offset))
@@ -1562,7 +1541,7 @@ retry:
1562 bytes_to_write, offset, 1541 bytes_to_write, offset,
1563 &bytes_written, iov, n_iov, 1542 &bytes_written, iov, n_iov,
1564 long_op); 1543 long_op);
1565 atomic_dec(&open_file->wrtPending); 1544 cifsFileInfo_put(open_file);
1566 cifs_update_eof(cifsi, offset, bytes_written); 1545 cifs_update_eof(cifsi, offset, bytes_written);
1567 1546
1568 if (rc || bytes_written < bytes_to_write) { 1547 if (rc || bytes_written < bytes_to_write) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 82d83839655e..1f09c7619319 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -800,7 +800,7 @@ set_via_filehandle:
800 if (open_file == NULL) 800 if (open_file == NULL)
801 CIFSSMBClose(xid, pTcon, netfid); 801 CIFSSMBClose(xid, pTcon, netfid);
802 else 802 else
803 atomic_dec(&open_file->wrtPending); 803 cifsFileInfo_put(open_file);
804out: 804out:
805 return rc; 805 return rc;
806} 806}
@@ -1635,7 +1635,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1635 __u32 npid = open_file->pid; 1635 __u32 npid = open_file->pid;
1636 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, 1636 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid,
1637 npid, false); 1637 npid, false);
1638 atomic_dec(&open_file->wrtPending); 1638 cifsFileInfo_put(open_file);
1639 cFYI(1, ("SetFSize for attrs rc = %d", rc)); 1639 cFYI(1, ("SetFSize for attrs rc = %d", rc));
1640 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { 1640 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1641 unsigned int bytes_written; 1641 unsigned int bytes_written;
@@ -1790,7 +1790,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1790 u16 nfid = open_file->netfid; 1790 u16 nfid = open_file->netfid;
1791 u32 npid = open_file->pid; 1791 u32 npid = open_file->pid;
1792 rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); 1792 rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid);
1793 atomic_dec(&open_file->wrtPending); 1793 cifsFileInfo_put(open_file);
1794 } else { 1794 } else {
1795 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, 1795 rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args,
1796 cifs_sb->local_nls, 1796 cifs_sb->local_nls,
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 0ad3e2d116a6..1da4ab250eae 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -119,20 +119,19 @@ AllocOplockQEntry(struct inode *pinode, __u16 fid, struct cifsTconInfo *tcon)
119 temp->pinode = pinode; 119 temp->pinode = pinode;
120 temp->tcon = tcon; 120 temp->tcon = tcon;
121 temp->netfid = fid; 121 temp->netfid = fid;
122 spin_lock(&GlobalMid_Lock); 122 spin_lock(&cifs_oplock_lock);
123 list_add_tail(&temp->qhead, &GlobalOplock_Q); 123 list_add_tail(&temp->qhead, &cifs_oplock_list);
124 spin_unlock(&GlobalMid_Lock); 124 spin_unlock(&cifs_oplock_lock);
125 } 125 }
126 return temp; 126 return temp;
127
128} 127}
129 128
130void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry) 129void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry)
131{ 130{
132 spin_lock(&GlobalMid_Lock); 131 spin_lock(&cifs_oplock_lock);
133 /* should we check if list empty first? */ 132 /* should we check if list empty first? */
134 list_del(&oplockEntry->qhead); 133 list_del(&oplockEntry->qhead);
135 spin_unlock(&GlobalMid_Lock); 134 spin_unlock(&cifs_oplock_lock);
136 kmem_cache_free(cifs_oplock_cachep, oplockEntry); 135 kmem_cache_free(cifs_oplock_cachep, oplockEntry);
137} 136}
138 137
@@ -144,14 +143,14 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon)
144 if (tcon == NULL) 143 if (tcon == NULL)
145 return; 144 return;
146 145
147 spin_lock(&GlobalMid_Lock); 146 spin_lock(&cifs_oplock_lock);
148 list_for_each_entry(temp, &GlobalOplock_Q, qhead) { 147 list_for_each_entry(temp, &cifs_oplock_list, qhead) {
149 if ((temp->tcon) && (temp->tcon == tcon)) { 148 if ((temp->tcon) && (temp->tcon == tcon)) {
150 list_del(&temp->qhead); 149 list_del(&temp->qhead);
151 kmem_cache_free(cifs_oplock_cachep, temp); 150 kmem_cache_free(cifs_oplock_cachep, temp);
152 } 151 }
153 } 152 }
154 spin_unlock(&GlobalMid_Lock); 153 spin_unlock(&cifs_oplock_lock);
155} 154}
156 155
157static int 156static int
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4921e7426d95..a2f746066c5d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -51,6 +51,7 @@ static const struct address_space_operations configfs_aops = {
51}; 51};
52 52
53static struct backing_dev_info configfs_backing_dev_info = { 53static struct backing_dev_info configfs_backing_dev_info = {
54 .name = "configfs",
54 .ra_pages = 0, /* No readahead */ 55 .ra_pages = 0, /* No readahead */
55 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 56 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
56}; 57};
diff --git a/fs/dcache.c b/fs/dcache.c
index 9e5cd3c3a6ba..a100fa35a48f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,6 +32,7 @@
32#include <linux/swap.h> 32#include <linux/swap.h>
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/fs_struct.h> 34#include <linux/fs_struct.h>
35#include <linux/hardirq.h>
35#include "internal.h" 36#include "internal.h"
36 37
37int sysctl_vfs_cache_pressure __read_mostly = 100; 38int sysctl_vfs_cache_pressure __read_mostly = 100;
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index ccc9d62c462d..55ea369f43a9 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -63,7 +63,7 @@ static int send_data(struct sk_buff *skb)
63 return rv; 63 return rv;
64 } 64 }
65 65
66 return genlmsg_unicast(skb, listener_nlpid); 66 return genlmsg_unicast(&init_net, skb, listener_nlpid);
67} 67}
68 68
69static int user_cmd(struct sk_buff *skb, struct genl_info *info) 69static int user_cmd(struct sk_buff *skb, struct genl_info *info)
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d636e1297cad..a63d44256a70 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -230,7 +230,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
230 return error; 230 return error;
231} 231}
232 232
233static int 233int
234ext2_check_acl(struct inode *inode, int mask) 234ext2_check_acl(struct inode *inode, int mask)
235{ 235{
236 struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); 236 struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
@@ -246,12 +246,6 @@ ext2_check_acl(struct inode *inode, int mask)
246 return -EAGAIN; 246 return -EAGAIN;
247} 247}
248 248
249int
250ext2_permission(struct inode *inode, int mask)
251{
252 return generic_permission(inode, mask, ext2_check_acl);
253}
254
255/* 249/*
256 * Initialize the ACLs of a new inode. Called from ext2_new_inode. 250 * Initialize the ACLs of a new inode. Called from ext2_new_inode.
257 * 251 *
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index ecefe478898f..3ff6cbb9ac44 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,13 +54,13 @@ static inline int ext2_acl_count(size_t size)
54#ifdef CONFIG_EXT2_FS_POSIX_ACL 54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext2_permission (struct inode *, int); 57extern int ext2_check_acl (struct inode *, int);
58extern int ext2_acl_chmod (struct inode *); 58extern int ext2_acl_chmod (struct inode *);
59extern int ext2_init_acl (struct inode *, struct inode *); 59extern int ext2_init_acl (struct inode *, struct inode *);
60 60
61#else 61#else
62#include <linux/sched.h> 62#include <linux/sched.h>
63#define ext2_permission NULL 63#define ext2_check_acl NULL
64#define ext2_get_acl NULL 64#define ext2_get_acl NULL
65#define ext2_set_acl NULL 65#define ext2_set_acl NULL
66 66
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2b9e47dc9222..a2f3afd1a1c1 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -85,6 +85,6 @@ const struct inode_operations ext2_file_inode_operations = {
85 .removexattr = generic_removexattr, 85 .removexattr = generic_removexattr,
86#endif 86#endif
87 .setattr = ext2_setattr, 87 .setattr = ext2_setattr,
88 .permission = ext2_permission, 88 .check_acl = ext2_check_acl,
89 .fiemap = ext2_fiemap, 89 .fiemap = ext2_fiemap,
90}; 90};
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index e27130341d4f..1c1638f873a4 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -482,7 +482,7 @@ static int ext2_alloc_branch(struct inode *inode,
482 unlock_buffer(bh); 482 unlock_buffer(bh);
483 mark_buffer_dirty_inode(bh, inode); 483 mark_buffer_dirty_inode(bh, inode);
484 /* We used to sync bh here if IS_SYNC(inode). 484 /* We used to sync bh here if IS_SYNC(inode).
485 * But we now rely upon generic_osync_inode() 485 * But we now rely upon generic_write_sync()
486 * and b_inode_buffers. But not for directories. 486 * and b_inode_buffers. But not for directories.
487 */ 487 */
488 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) 488 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode))
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 78d9b925fc94..23701f289e98 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -400,7 +400,7 @@ const struct inode_operations ext2_dir_inode_operations = {
400 .removexattr = generic_removexattr, 400 .removexattr = generic_removexattr,
401#endif 401#endif
402 .setattr = ext2_setattr, 402 .setattr = ext2_setattr,
403 .permission = ext2_permission, 403 .check_acl = ext2_check_acl,
404}; 404};
405 405
406const struct inode_operations ext2_special_inode_operations = { 406const struct inode_operations ext2_special_inode_operations = {
@@ -411,5 +411,5 @@ const struct inode_operations ext2_special_inode_operations = {
411 .removexattr = generic_removexattr, 411 .removexattr = generic_removexattr,
412#endif 412#endif
413 .setattr = ext2_setattr, 413 .setattr = ext2_setattr,
414 .permission = ext2_permission, 414 .check_acl = ext2_check_acl,
415}; 415};
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e167bae37ef0..c9b0df376b5f 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -238,7 +238,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
238 return error; 238 return error;
239} 239}
240 240
241static int 241int
242ext3_check_acl(struct inode *inode, int mask) 242ext3_check_acl(struct inode *inode, int mask)
243{ 243{
244 struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); 244 struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
@@ -254,12 +254,6 @@ ext3_check_acl(struct inode *inode, int mask)
254 return -EAGAIN; 254 return -EAGAIN;
255} 255}
256 256
257int
258ext3_permission(struct inode *inode, int mask)
259{
260 return generic_permission(inode, mask, ext3_check_acl);
261}
262
263/* 257/*
264 * Initialize the ACLs of a new inode. Called from ext3_new_inode. 258 * Initialize the ACLs of a new inode. Called from ext3_new_inode.
265 * 259 *
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 07d15a3a5969..597334626de9 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size)
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext3_permission (struct inode *, int); 57extern int ext3_check_acl (struct inode *, int);
58extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
60 60
61#else /* CONFIG_EXT3_FS_POSIX_ACL */ 61#else /* CONFIG_EXT3_FS_POSIX_ACL */
62#include <linux/sched.h> 62#include <linux/sched.h>
63#define ext3_permission NULL 63#define ext3_check_acl NULL
64 64
65static inline int 65static inline int
66ext3_acl_chmod(struct inode *inode) 66ext3_acl_chmod(struct inode *inode)
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 5b49704b231b..388bbdfa0b4e 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -51,71 +51,12 @@ static int ext3_release_file (struct inode * inode, struct file * filp)
51 return 0; 51 return 0;
52} 52}
53 53
54static ssize_t
55ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
56 unsigned long nr_segs, loff_t pos)
57{
58 struct file *file = iocb->ki_filp;
59 struct inode *inode = file->f_path.dentry->d_inode;
60 ssize_t ret;
61 int err;
62
63 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
64
65 /*
66 * Skip flushing if there was an error, or if nothing was written.
67 */
68 if (ret <= 0)
69 return ret;
70
71 /*
72 * If the inode is IS_SYNC, or is O_SYNC and we are doing data
73 * journalling then we need to make sure that we force the transaction
74 * to disk to keep all metadata uptodate synchronously.
75 */
76 if (file->f_flags & O_SYNC) {
77 /*
78 * If we are non-data-journaled, then the dirty data has
79 * already been flushed to backing store by generic_osync_inode,
80 * and the inode has been flushed too if there have been any
81 * modifications other than mere timestamp updates.
82 *
83 * Open question --- do we care about flushing timestamps too
84 * if the inode is IS_SYNC?
85 */
86 if (!ext3_should_journal_data(inode))
87 return ret;
88
89 goto force_commit;
90 }
91
92 /*
93 * So we know that there has been no forced data flush. If the inode
94 * is marked IS_SYNC, we need to force one ourselves.
95 */
96 if (!IS_SYNC(inode))
97 return ret;
98
99 /*
100 * Open question #2 --- should we force data to disk here too? If we
101 * don't, the only impact is that data=writeback filesystems won't
102 * flush data to disk automatically on IS_SYNC, only metadata (but
103 * historically, that is what ext2 has done.)
104 */
105
106force_commit:
107 err = ext3_force_commit(inode->i_sb);
108 if (err)
109 return err;
110 return ret;
111}
112
113const struct file_operations ext3_file_operations = { 54const struct file_operations ext3_file_operations = {
114 .llseek = generic_file_llseek, 55 .llseek = generic_file_llseek,
115 .read = do_sync_read, 56 .read = do_sync_read,
116 .write = do_sync_write, 57 .write = do_sync_write,
117 .aio_read = generic_file_aio_read, 58 .aio_read = generic_file_aio_read,
118 .aio_write = ext3_file_write, 59 .aio_write = generic_file_aio_write,
119 .unlocked_ioctl = ext3_ioctl, 60 .unlocked_ioctl = ext3_ioctl,
120#ifdef CONFIG_COMPAT 61#ifdef CONFIG_COMPAT
121 .compat_ioctl = ext3_compat_ioctl, 62 .compat_ioctl = ext3_compat_ioctl,
@@ -137,7 +78,7 @@ const struct inode_operations ext3_file_inode_operations = {
137 .listxattr = ext3_listxattr, 78 .listxattr = ext3_listxattr,
138 .removexattr = generic_removexattr, 79 .removexattr = generic_removexattr,
139#endif 80#endif
140 .permission = ext3_permission, 81 .check_acl = ext3_check_acl,
141 .fiemap = ext3_fiemap, 82 .fiemap = ext3_fiemap,
142}; 83};
143 84
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 6ff7b9730234..aad6400c9b77 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2445,7 +2445,7 @@ const struct inode_operations ext3_dir_inode_operations = {
2445 .listxattr = ext3_listxattr, 2445 .listxattr = ext3_listxattr,
2446 .removexattr = generic_removexattr, 2446 .removexattr = generic_removexattr,
2447#endif 2447#endif
2448 .permission = ext3_permission, 2448 .check_acl = ext3_check_acl,
2449}; 2449};
2450 2450
2451const struct inode_operations ext3_special_inode_operations = { 2451const struct inode_operations ext3_special_inode_operations = {
@@ -2456,5 +2456,5 @@ const struct inode_operations ext3_special_inode_operations = {
2456 .listxattr = ext3_listxattr, 2456 .listxattr = ext3_listxattr,
2457 .removexattr = generic_removexattr, 2457 .removexattr = generic_removexattr,
2458#endif 2458#endif
2459 .permission = ext3_permission, 2459 .check_acl = ext3_check_acl,
2460}; 2460};
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index f6d8967149ca..0df88b2a69b0 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -236,7 +236,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
236 return error; 236 return error;
237} 237}
238 238
239static int 239int
240ext4_check_acl(struct inode *inode, int mask) 240ext4_check_acl(struct inode *inode, int mask)
241{ 241{
242 struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); 242 struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
@@ -252,12 +252,6 @@ ext4_check_acl(struct inode *inode, int mask)
252 return -EAGAIN; 252 return -EAGAIN;
253} 253}
254 254
255int
256ext4_permission(struct inode *inode, int mask)
257{
258 return generic_permission(inode, mask, ext4_check_acl);
259}
260
261/* 255/*
262 * Initialize the ACLs of a new inode. Called from ext4_new_inode. 256 * Initialize the ACLs of a new inode. Called from ext4_new_inode.
263 * 257 *
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 949789d2bba6..9d843d5deac4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,13 +54,13 @@ static inline int ext4_acl_count(size_t size)
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext4_permission(struct inode *, int); 57extern int ext4_check_acl(struct inode *, int);
58extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); 59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
60 60
61#else /* CONFIG_EXT4_FS_POSIX_ACL */ 61#else /* CONFIG_EXT4_FS_POSIX_ACL */
62#include <linux/sched.h> 62#include <linux/sched.h>
63#define ext4_permission NULL 63#define ext4_check_acl NULL
64 64
65static inline int 65static inline int
66ext4_acl_chmod(struct inode *inode) 66ext4_acl_chmod(struct inode *inode)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3f1873fef1c6..5ca3eca70a1e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -58,10 +58,7 @@ static ssize_t
58ext4_file_write(struct kiocb *iocb, const struct iovec *iov, 58ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
59 unsigned long nr_segs, loff_t pos) 59 unsigned long nr_segs, loff_t pos)
60{ 60{
61 struct file *file = iocb->ki_filp; 61 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
62 struct inode *inode = file->f_path.dentry->d_inode;
63 ssize_t ret;
64 int err;
65 62
66 /* 63 /*
67 * If we have encountered a bitmap-format file, the size limit 64 * If we have encountered a bitmap-format file, the size limit
@@ -81,53 +78,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
81 } 78 }
82 } 79 }
83 80
84 ret = generic_file_aio_write(iocb, iov, nr_segs, pos); 81 return generic_file_aio_write(iocb, iov, nr_segs, pos);
85 /*
86 * Skip flushing if there was an error, or if nothing was written.
87 */
88 if (ret <= 0)
89 return ret;
90
91 /*
92 * If the inode is IS_SYNC, or is O_SYNC and we are doing data
93 * journalling then we need to make sure that we force the transaction
94 * to disk to keep all metadata uptodate synchronously.
95 */
96 if (file->f_flags & O_SYNC) {
97 /*
98 * If we are non-data-journaled, then the dirty data has
99 * already been flushed to backing store by generic_osync_inode,
100 * and the inode has been flushed too if there have been any
101 * modifications other than mere timestamp updates.
102 *
103 * Open question --- do we care about flushing timestamps too
104 * if the inode is IS_SYNC?
105 */
106 if (!ext4_should_journal_data(inode))
107 return ret;
108
109 goto force_commit;
110 }
111
112 /*
113 * So we know that there has been no forced data flush. If the inode
114 * is marked IS_SYNC, we need to force one ourselves.
115 */
116 if (!IS_SYNC(inode))
117 return ret;
118
119 /*
120 * Open question #2 --- should we force data to disk here too? If we
121 * don't, the only impact is that data=writeback filesystems won't
122 * flush data to disk automatically on IS_SYNC, only metadata (but
123 * historically, that is what ext2 has done.)
124 */
125
126force_commit:
127 err = ext4_force_commit(inode->i_sb);
128 if (err)
129 return err;
130 return ret;
131} 82}
132 83
133static struct vm_operations_struct ext4_file_vm_ops = { 84static struct vm_operations_struct ext4_file_vm_ops = {
@@ -207,7 +158,7 @@ const struct inode_operations ext4_file_inode_operations = {
207 .listxattr = ext4_listxattr, 158 .listxattr = ext4_listxattr,
208 .removexattr = generic_removexattr, 159 .removexattr = generic_removexattr,
209#endif 160#endif
210 .permission = ext4_permission, 161 .check_acl = ext4_check_acl,
211 .fallocate = ext4_fallocate, 162 .fallocate = ext4_fallocate,
212 .fiemap = ext4_fiemap, 163 .fiemap = ext4_fiemap,
213}; 164};
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index de04013d16ff..114abe5d2c1d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2536,7 +2536,7 @@ const struct inode_operations ext4_dir_inode_operations = {
2536 .listxattr = ext4_listxattr, 2536 .listxattr = ext4_listxattr,
2537 .removexattr = generic_removexattr, 2537 .removexattr = generic_removexattr,
2538#endif 2538#endif
2539 .permission = ext4_permission, 2539 .check_acl = ext4_check_acl,
2540 .fiemap = ext4_fiemap, 2540 .fiemap = ext4_fiemap,
2541}; 2541};
2542 2542
@@ -2548,5 +2548,5 @@ const struct inode_operations ext4_special_inode_operations = {
2548 .listxattr = ext4_listxattr, 2548 .listxattr = ext4_listxattr,
2549 .removexattr = generic_removexattr, 2549 .removexattr = generic_removexattr,
2550#endif 2550#endif
2551 .permission = ext4_permission, 2551 .check_acl = ext4_check_acl,
2552}; 2552};
diff --git a/fs/fat/file.c b/fs/fat/file.c
index f042b965c95c..e8c159de236b 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -176,8 +176,26 @@ static int fat_cont_expand(struct inode *inode, loff_t size)
176 176
177 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC; 177 inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
178 mark_inode_dirty(inode); 178 mark_inode_dirty(inode);
179 if (IS_SYNC(inode)) 179 if (IS_SYNC(inode)) {
180 err = sync_page_range_nolock(inode, mapping, start, count); 180 int err2;
181
182 /*
183 * Opencode syncing since we don't have a file open to use
184 * standard fsync path.
185 */
186 err = filemap_fdatawrite_range(mapping, start,
187 start + count - 1);
188 err2 = sync_mapping_buffers(mapping);
189 if (!err)
190 err = err2;
191 err2 = write_inode_now(inode, 1);
192 if (!err)
193 err = err2;
194 if (!err) {
195 err = filemap_fdatawait_range(mapping, start,
196 start + count - 1);
197 }
198 }
181out: 199out:
182 return err; 200 return err;
183} 201}
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index a6c20473dfd7..4e35be873e09 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -119,8 +119,8 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
119 MSDOS_I(inode)->i_start = new_dclus; 119 MSDOS_I(inode)->i_start = new_dclus;
120 MSDOS_I(inode)->i_logstart = new_dclus; 120 MSDOS_I(inode)->i_logstart = new_dclus;
121 /* 121 /*
122 * Since generic_osync_inode() synchronize later if 122 * Since generic_write_sync() synchronizes regular files later,
123 * this is not directory, we don't here. 123 * we sync here only directories.
124 */ 124 */
125 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) { 125 if (S_ISDIR(inode->i_mode) && IS_DIRSYNC(inode)) {
126 ret = fat_sync_inode(inode); 126 ret = fat_sync_inode(inode);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c54226be5294..628235cf44b5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -19,171 +19,223 @@
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/kthread.h>
23#include <linux/freezer.h>
22#include <linux/writeback.h> 24#include <linux/writeback.h>
23#include <linux/blkdev.h> 25#include <linux/blkdev.h>
24#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
25#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
26#include "internal.h" 28#include "internal.h"
27 29
30#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
28 31
29/** 32/*
30 * writeback_acquire - attempt to get exclusive writeback access to a device 33 * We don't actually have pdflush, but this one is exported though /proc...
31 * @bdi: the device's backing_dev_info structure
32 *
33 * It is a waste of resources to have more than one pdflush thread blocked on
34 * a single request queue. Exclusion at the request_queue level is obtained
35 * via a flag in the request_queue's backing_dev_info.state.
36 *
37 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
38 * unless they implement their own. Which is somewhat inefficient, as this
39 * may prevent concurrent writeback against multiple devices.
40 */ 34 */
41static int writeback_acquire(struct backing_dev_info *bdi) 35int nr_pdflush_threads;
36
37/*
38 * Work items for the bdi_writeback threads
39 */
40struct bdi_work {
41 struct list_head list;
42 struct list_head wait_list;
43 struct rcu_head rcu_head;
44
45 unsigned long seen;
46 atomic_t pending;
47
48 struct super_block *sb;
49 unsigned long nr_pages;
50 enum writeback_sync_modes sync_mode;
51
52 unsigned long state;
53};
54
55enum {
56 WS_USED_B = 0,
57 WS_ONSTACK_B,
58};
59
60#define WS_USED (1 << WS_USED_B)
61#define WS_ONSTACK (1 << WS_ONSTACK_B)
62
63static inline bool bdi_work_on_stack(struct bdi_work *work)
64{
65 return test_bit(WS_ONSTACK_B, &work->state);
66}
67
68static inline void bdi_work_init(struct bdi_work *work,
69 struct writeback_control *wbc)
70{
71 INIT_RCU_HEAD(&work->rcu_head);
72 work->sb = wbc->sb;
73 work->nr_pages = wbc->nr_to_write;
74 work->sync_mode = wbc->sync_mode;
75 work->state = WS_USED;
76}
77
78static inline void bdi_work_init_on_stack(struct bdi_work *work,
79 struct writeback_control *wbc)
42{ 80{
43 return !test_and_set_bit(BDI_pdflush, &bdi->state); 81 bdi_work_init(work, wbc);
82 work->state |= WS_ONSTACK;
44} 83}
45 84
46/** 85/**
47 * writeback_in_progress - determine whether there is writeback in progress 86 * writeback_in_progress - determine whether there is writeback in progress
48 * @bdi: the device's backing_dev_info structure. 87 * @bdi: the device's backing_dev_info structure.
49 * 88 *
50 * Determine whether there is writeback in progress against a backing device. 89 * Determine whether there is writeback waiting to be handled against a
90 * backing device.
51 */ 91 */
52int writeback_in_progress(struct backing_dev_info *bdi) 92int writeback_in_progress(struct backing_dev_info *bdi)
53{ 93{
54 return test_bit(BDI_pdflush, &bdi->state); 94 return !list_empty(&bdi->work_list);
55} 95}
56 96
57/** 97static void bdi_work_clear(struct bdi_work *work)
58 * writeback_release - relinquish exclusive writeback access against a device.
59 * @bdi: the device's backing_dev_info structure
60 */
61static void writeback_release(struct backing_dev_info *bdi)
62{ 98{
63 BUG_ON(!writeback_in_progress(bdi)); 99 clear_bit(WS_USED_B, &work->state);
64 clear_bit(BDI_pdflush, &bdi->state); 100 smp_mb__after_clear_bit();
101 wake_up_bit(&work->state, WS_USED_B);
65} 102}
66 103
67static noinline void block_dump___mark_inode_dirty(struct inode *inode) 104static void bdi_work_free(struct rcu_head *head)
68{ 105{
69 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 106 struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
70 struct dentry *dentry;
71 const char *name = "?";
72 107
73 dentry = d_find_alias(inode); 108 if (!bdi_work_on_stack(work))
74 if (dentry) { 109 kfree(work);
75 spin_lock(&dentry->d_lock); 110 else
76 name = (const char *) dentry->d_name.name; 111 bdi_work_clear(work);
77 }
78 printk(KERN_DEBUG
79 "%s(%d): dirtied inode %lu (%s) on %s\n",
80 current->comm, task_pid_nr(current), inode->i_ino,
81 name, inode->i_sb->s_id);
82 if (dentry) {
83 spin_unlock(&dentry->d_lock);
84 dput(dentry);
85 }
86 }
87} 112}
88 113
89/** 114static void wb_work_complete(struct bdi_work *work)
90 * __mark_inode_dirty - internal function
91 * @inode: inode to mark
92 * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
93 * Mark an inode as dirty. Callers should use mark_inode_dirty or
94 * mark_inode_dirty_sync.
95 *
96 * Put the inode on the super block's dirty list.
97 *
98 * CAREFUL! We mark it dirty unconditionally, but move it onto the
99 * dirty list only if it is hashed or if it refers to a blockdev.
100 * If it was not hashed, it will never be added to the dirty list
101 * even if it is later hashed, as it will have been marked dirty already.
102 *
103 * In short, make sure you hash any inodes _before_ you start marking
104 * them dirty.
105 *
106 * This function *must* be atomic for the I_DIRTY_PAGES case -
107 * set_page_dirty() is called under spinlock in several places.
108 *
109 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
110 * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
111 * the kernel-internal blockdev inode represents the dirtying time of the
112 * blockdev's pages. This is why for I_DIRTY_PAGES we always use
113 * page->mapping->host, so the page-dirtying time is recorded in the internal
114 * blockdev inode.
115 */
116void __mark_inode_dirty(struct inode *inode, int flags)
117{ 115{
118 struct super_block *sb = inode->i_sb; 116 const enum writeback_sync_modes sync_mode = work->sync_mode;
119 117
120 /* 118 /*
121 * Don't do this for I_DIRTY_PAGES - that doesn't actually 119 * For allocated work, we can clear the done/seen bit right here.
122 * dirty the inode itself 120 * For on-stack work, we need to postpone both the clear and free
121 * to after the RCU grace period, since the stack could be invalidated
122 * as soon as bdi_work_clear() has done the wakeup.
123 */ 123 */
124 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 124 if (!bdi_work_on_stack(work))
125 if (sb->s_op->dirty_inode) 125 bdi_work_clear(work);
126 sb->s_op->dirty_inode(inode); 126 if (sync_mode == WB_SYNC_NONE || bdi_work_on_stack(work))
127 } 127 call_rcu(&work->rcu_head, bdi_work_free);
128}
128 129
130static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
131{
129 /* 132 /*
130 * make sure that changes are seen by all cpus before we test i_state 133 * The caller has retrieved the work arguments from this work,
131 * -- mikulas 134 * drop our reference. If this is the last ref, delete and free it
132 */ 135 */
133 smp_mb(); 136 if (atomic_dec_and_test(&work->pending)) {
134 137 struct backing_dev_info *bdi = wb->bdi;
135 /* avoid the locking if we can */
136 if ((inode->i_state & flags) == flags)
137 return;
138 138
139 if (unlikely(block_dump)) 139 spin_lock(&bdi->wb_lock);
140 block_dump___mark_inode_dirty(inode); 140 list_del_rcu(&work->list);
141 spin_unlock(&bdi->wb_lock);
141 142
142 spin_lock(&inode_lock); 143 wb_work_complete(work);
143 if ((inode->i_state & flags) != flags) { 144 }
144 const int was_dirty = inode->i_state & I_DIRTY; 145}
145 146
146 inode->i_state |= flags; 147static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
148{
149 if (work) {
150 work->seen = bdi->wb_mask;
151 BUG_ON(!work->seen);
152 atomic_set(&work->pending, bdi->wb_cnt);
153 BUG_ON(!bdi->wb_cnt);
147 154
148 /* 155 /*
149 * If the inode is being synced, just update its dirty state. 156 * Make sure stores are seen before it appears on the list
150 * The unlocker will place the inode on the appropriate
151 * superblock list, based upon its state.
152 */ 157 */
153 if (inode->i_state & I_SYNC) 158 smp_mb();
154 goto out;
155 159
156 /* 160 spin_lock(&bdi->wb_lock);
157 * Only add valid (hashed) inodes to the superblock's 161 list_add_tail_rcu(&work->list, &bdi->work_list);
158 * dirty list. Add blockdev inodes as well. 162 spin_unlock(&bdi->wb_lock);
159 */ 163 }
160 if (!S_ISBLK(inode->i_mode)) { 164
161 if (hlist_unhashed(&inode->i_hash)) 165 /*
162 goto out; 166 * If the default thread isn't there, make sure we add it. When
163 } 167 * it gets created and wakes up, we'll run this work.
164 if (inode->i_state & (I_FREEING|I_CLEAR)) 168 */
165 goto out; 169 if (unlikely(list_empty_careful(&bdi->wb_list)))
170 wake_up_process(default_backing_dev_info.wb.task);
171 else {
172 struct bdi_writeback *wb = &bdi->wb;
166 173
167 /* 174 /*
168 * If the inode was already on s_dirty/s_io/s_more_io, don't 175 * If we failed allocating the bdi work item, wake up the wb
169 * reposition it (that would break s_dirty time-ordering). 176 * thread always. As a safety precaution, it'll flush out
177 * everything
170 */ 178 */
171 if (!was_dirty) { 179 if (!wb_has_dirty_io(wb)) {
172 inode->dirtied_when = jiffies; 180 if (work)
173 list_move(&inode->i_list, &sb->s_dirty); 181 wb_clear_pending(wb, work);
174 } 182 } else if (wb->task)
183 wake_up_process(wb->task);
175 } 184 }
176out:
177 spin_unlock(&inode_lock);
178} 185}
179 186
180EXPORT_SYMBOL(__mark_inode_dirty); 187/*
188 * Used for on-stack allocated work items. The caller needs to wait until
189 * the wb threads have acked the work before it's safe to continue.
190 */
191static void bdi_wait_on_work_clear(struct bdi_work *work)
192{
193 wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
194 TASK_UNINTERRUPTIBLE);
195}
181 196
182static int write_inode(struct inode *inode, int sync) 197static struct bdi_work *bdi_alloc_work(struct writeback_control *wbc)
183{ 198{
184 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) 199 struct bdi_work *work;
185 return inode->i_sb->s_op->write_inode(inode, sync); 200
186 return 0; 201 work = kmalloc(sizeof(*work), GFP_ATOMIC);
202 if (work)
203 bdi_work_init(work, wbc);
204
205 return work;
206}
207
208void bdi_start_writeback(struct writeback_control *wbc)
209{
210 const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
211 struct bdi_work work_stack, *work = NULL;
212
213 if (!must_wait)
214 work = bdi_alloc_work(wbc);
215
216 if (!work) {
217 work = &work_stack;
218 bdi_work_init_on_stack(work, wbc);
219 }
220
221 bdi_queue_work(wbc->bdi, work);
222
223 /*
224 * If the sync mode is WB_SYNC_ALL, block waiting for the work to
225 * complete. If not, we only need to wait for the work to be started,
226 * if we allocated it on-stack. We use the same mechanism, if the
227 * wait bit is set in the bdi_work struct, then threads will not
228 * clear pending until after they are done.
229 *
230 * Note that work == &work_stack if must_wait is true, so we don't
231 * need to do call_rcu() here ever, since the completion path will
232 * have done that for us.
233 */
234 if (must_wait || work == &work_stack) {
235 bdi_wait_on_work_clear(work);
236 if (work != &work_stack)
237 call_rcu(&work->rcu_head, bdi_work_free);
238 }
187} 239}
188 240
189/* 241/*
@@ -191,31 +243,32 @@ static int write_inode(struct inode *inode, int sync)
191 * furthest end of its superblock's dirty-inode list. 243 * furthest end of its superblock's dirty-inode list.
192 * 244 *
193 * Before stamping the inode's ->dirtied_when, we check to see whether it is 245 * Before stamping the inode's ->dirtied_when, we check to see whether it is
194 * already the most-recently-dirtied inode on the s_dirty list. If that is 246 * already the most-recently-dirtied inode on the b_dirty list. If that is
195 * the case then the inode must have been redirtied while it was being written 247 * the case then the inode must have been redirtied while it was being written
196 * out and we don't reset its dirtied_when. 248 * out and we don't reset its dirtied_when.
197 */ 249 */
198static void redirty_tail(struct inode *inode) 250static void redirty_tail(struct inode *inode)
199{ 251{
200 struct super_block *sb = inode->i_sb; 252 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
201 253
202 if (!list_empty(&sb->s_dirty)) { 254 if (!list_empty(&wb->b_dirty)) {
203 struct inode *tail_inode; 255 struct inode *tail;
204 256
205 tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); 257 tail = list_entry(wb->b_dirty.next, struct inode, i_list);
206 if (time_before(inode->dirtied_when, 258 if (time_before(inode->dirtied_when, tail->dirtied_when))
207 tail_inode->dirtied_when))
208 inode->dirtied_when = jiffies; 259 inode->dirtied_when = jiffies;
209 } 260 }
210 list_move(&inode->i_list, &sb->s_dirty); 261 list_move(&inode->i_list, &wb->b_dirty);
211} 262}
212 263
213/* 264/*
214 * requeue inode for re-scanning after sb->s_io list is exhausted. 265 * requeue inode for re-scanning after bdi->b_io list is exhausted.
215 */ 266 */
216static void requeue_io(struct inode *inode) 267static void requeue_io(struct inode *inode)
217{ 268{
218 list_move(&inode->i_list, &inode->i_sb->s_more_io); 269 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
270
271 list_move(&inode->i_list, &wb->b_more_io);
219} 272}
220 273
221static void inode_sync_complete(struct inode *inode) 274static void inode_sync_complete(struct inode *inode)
@@ -262,20 +315,18 @@ static void move_expired_inodes(struct list_head *delaying_queue,
262/* 315/*
263 * Queue all expired dirty inodes for io, eldest first. 316 * Queue all expired dirty inodes for io, eldest first.
264 */ 317 */
265static void queue_io(struct super_block *sb, 318static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
266 unsigned long *older_than_this)
267{ 319{
268 list_splice_init(&sb->s_more_io, sb->s_io.prev); 320 list_splice_init(&wb->b_more_io, wb->b_io.prev);
269 move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); 321 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
270} 322}
271 323
272int sb_has_dirty_inodes(struct super_block *sb) 324static int write_inode(struct inode *inode, int sync)
273{ 325{
274 return !list_empty(&sb->s_dirty) || 326 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
275 !list_empty(&sb->s_io) || 327 return inode->i_sb->s_op->write_inode(inode, sync);
276 !list_empty(&sb->s_more_io); 328 return 0;
277} 329}
278EXPORT_SYMBOL(sb_has_dirty_inodes);
279 330
280/* 331/*
281 * Wait for writeback on an inode to complete. 332 * Wait for writeback on an inode to complete.
@@ -322,11 +373,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
322 if (inode->i_state & I_SYNC) { 373 if (inode->i_state & I_SYNC) {
323 /* 374 /*
324 * If this inode is locked for writeback and we are not doing 375 * If this inode is locked for writeback and we are not doing
325 * writeback-for-data-integrity, move it to s_more_io so that 376 * writeback-for-data-integrity, move it to b_more_io so that
326 * writeback can proceed with the other inodes on s_io. 377 * writeback can proceed with the other inodes on s_io.
327 * 378 *
328 * We'll have another go at writing back this inode when we 379 * We'll have another go at writing back this inode when we
329 * completed a full scan of s_io. 380 * completed a full scan of b_io.
330 */ 381 */
331 if (!wait) { 382 if (!wait) {
332 requeue_io(inode); 383 requeue_io(inode);
@@ -371,11 +422,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
371 /* 422 /*
372 * We didn't write back all the pages. nfs_writepages() 423 * We didn't write back all the pages. nfs_writepages()
373 * sometimes bales out without doing anything. Redirty 424 * sometimes bales out without doing anything. Redirty
374 * the inode; Move it from s_io onto s_more_io/s_dirty. 425 * the inode; Move it from b_io onto b_more_io/b_dirty.
375 */ 426 */
376 /* 427 /*
377 * akpm: if the caller was the kupdate function we put 428 * akpm: if the caller was the kupdate function we put
378 * this inode at the head of s_dirty so it gets first 429 * this inode at the head of b_dirty so it gets first
379 * consideration. Otherwise, move it to the tail, for 430 * consideration. Otherwise, move it to the tail, for
380 * the reasons described there. I'm not really sure 431 * the reasons described there. I'm not really sure
381 * how much sense this makes. Presumably I had a good 432 * how much sense this makes. Presumably I had a good
@@ -385,7 +436,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
385 if (wbc->for_kupdate) { 436 if (wbc->for_kupdate) {
386 /* 437 /*
387 * For the kupdate function we move the inode 438 * For the kupdate function we move the inode
388 * to s_more_io so it will get more writeout as 439 * to b_more_io so it will get more writeout as
389 * soon as the queue becomes uncongested. 440 * soon as the queue becomes uncongested.
390 */ 441 */
391 inode->i_state |= I_DIRTY_PAGES; 442 inode->i_state |= I_DIRTY_PAGES;
@@ -434,50 +485,84 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
434} 485}
435 486
436/* 487/*
437 * Write out a superblock's list of dirty inodes. A wait will be performed 488 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
438 * upon no inodes, all inodes or the final one, depending upon sync_mode. 489 * before calling writeback. So make sure that we do pin it, so it doesn't
439 * 490 * go away while we are writing inodes from it.
440 * If older_than_this is non-NULL, then only write out inodes which
441 * had their first dirtying at a time earlier than *older_than_this.
442 *
443 * If we're a pdflush thread, then implement pdflush collision avoidance
444 * against the entire list.
445 *
446 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
447 * This function assumes that the blockdev superblock's inodes are backed by
448 * a variety of queues, so all inodes are searched. For other superblocks,
449 * assume that all inodes are backed by the same queue.
450 *
451 * FIXME: this linear search could get expensive with many fileystems. But
452 * how to fix? We need to go from an address_space to all inodes which share
453 * a queue with that address_space. (Easy: have a global "dirty superblocks"
454 * list).
455 * 491 *
456 * The inodes to be written are parked on sb->s_io. They are moved back onto 492 * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
457 * sb->s_dirty as they are selected for writing. This way, none can be missed 493 * 1 if we failed.
458 * on the writer throttling path, and we get decent balancing between many
459 * throttled threads: we don't want them all piling up on inode_sync_wait.
460 */ 494 */
461void generic_sync_sb_inodes(struct super_block *sb, 495static int pin_sb_for_writeback(struct writeback_control *wbc,
496 struct inode *inode)
497{
498 struct super_block *sb = inode->i_sb;
499
500 /*
501 * Caller must already hold the ref for this
502 */
503 if (wbc->sync_mode == WB_SYNC_ALL) {
504 WARN_ON(!rwsem_is_locked(&sb->s_umount));
505 return 0;
506 }
507
508 spin_lock(&sb_lock);
509 sb->s_count++;
510 if (down_read_trylock(&sb->s_umount)) {
511 if (sb->s_root) {
512 spin_unlock(&sb_lock);
513 return 0;
514 }
515 /*
516 * umounted, drop rwsem again and fall through to failure
517 */
518 up_read(&sb->s_umount);
519 }
520
521 sb->s_count--;
522 spin_unlock(&sb_lock);
523 return 1;
524}
525
526static void unpin_sb_for_writeback(struct writeback_control *wbc,
527 struct inode *inode)
528{
529 struct super_block *sb = inode->i_sb;
530
531 if (wbc->sync_mode == WB_SYNC_ALL)
532 return;
533
534 up_read(&sb->s_umount);
535 put_super(sb);
536}
537
538static void writeback_inodes_wb(struct bdi_writeback *wb,
462 struct writeback_control *wbc) 539 struct writeback_control *wbc)
463{ 540{
541 struct super_block *sb = wbc->sb;
542 const int is_blkdev_sb = sb_is_blkdev_sb(sb);
464 const unsigned long start = jiffies; /* livelock avoidance */ 543 const unsigned long start = jiffies; /* livelock avoidance */
465 int sync = wbc->sync_mode == WB_SYNC_ALL;
466 544
467 spin_lock(&inode_lock); 545 spin_lock(&inode_lock);
468 if (!wbc->for_kupdate || list_empty(&sb->s_io))
469 queue_io(sb, wbc->older_than_this);
470 546
471 while (!list_empty(&sb->s_io)) { 547 if (!wbc->for_kupdate || list_empty(&wb->b_io))
472 struct inode *inode = list_entry(sb->s_io.prev, 548 queue_io(wb, wbc->older_than_this);
549
550 while (!list_empty(&wb->b_io)) {
551 struct inode *inode = list_entry(wb->b_io.prev,
473 struct inode, i_list); 552 struct inode, i_list);
474 struct address_space *mapping = inode->i_mapping;
475 struct backing_dev_info *bdi = mapping->backing_dev_info;
476 long pages_skipped; 553 long pages_skipped;
477 554
478 if (!bdi_cap_writeback_dirty(bdi)) { 555 /*
556 * super block given and doesn't match, skip this inode
557 */
558 if (sb && sb != inode->i_sb) {
559 redirty_tail(inode);
560 continue;
561 }
562
563 if (!bdi_cap_writeback_dirty(wb->bdi)) {
479 redirty_tail(inode); 564 redirty_tail(inode);
480 if (sb_is_blkdev_sb(sb)) { 565 if (is_blkdev_sb) {
481 /* 566 /*
482 * Dirty memory-backed blockdev: the ramdisk 567 * Dirty memory-backed blockdev: the ramdisk
483 * driver does this. Skip just this inode 568 * driver does this. Skip just this inode
@@ -497,21 +582,14 @@ void generic_sync_sb_inodes(struct super_block *sb,
497 continue; 582 continue;
498 } 583 }
499 584
500 if (wbc->nonblocking && bdi_write_congested(bdi)) { 585 if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
501 wbc->encountered_congestion = 1; 586 wbc->encountered_congestion = 1;
502 if (!sb_is_blkdev_sb(sb)) 587 if (!is_blkdev_sb)
503 break; /* Skip a congested fs */ 588 break; /* Skip a congested fs */
504 requeue_io(inode); 589 requeue_io(inode);
505 continue; /* Skip a congested blockdev */ 590 continue; /* Skip a congested blockdev */
506 } 591 }
507 592
508 if (wbc->bdi && bdi != wbc->bdi) {
509 if (!sb_is_blkdev_sb(sb))
510 break; /* fs has the wrong queue */
511 requeue_io(inode);
512 continue; /* blockdev has wrong queue */
513 }
514
515 /* 593 /*
516 * Was this inode dirtied after sync_sb_inodes was called? 594 * Was this inode dirtied after sync_sb_inodes was called?
517 * This keeps sync from extra jobs and livelock. 595 * This keeps sync from extra jobs and livelock.
@@ -519,16 +597,16 @@ void generic_sync_sb_inodes(struct super_block *sb,
519 if (inode_dirtied_after(inode, start)) 597 if (inode_dirtied_after(inode, start))
520 break; 598 break;
521 599
522 /* Is another pdflush already flushing this queue? */ 600 if (pin_sb_for_writeback(wbc, inode)) {
523 if (current_is_pdflush() && !writeback_acquire(bdi)) 601 requeue_io(inode);
524 break; 602 continue;
603 }
525 604
526 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); 605 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
527 __iget(inode); 606 __iget(inode);
528 pages_skipped = wbc->pages_skipped; 607 pages_skipped = wbc->pages_skipped;
529 writeback_single_inode(inode, wbc); 608 writeback_single_inode(inode, wbc);
530 if (current_is_pdflush()) 609 unpin_sb_for_writeback(wbc, inode);
531 writeback_release(bdi);
532 if (wbc->pages_skipped != pages_skipped) { 610 if (wbc->pages_skipped != pages_skipped) {
533 /* 611 /*
534 * writeback is not making progress due to locked 612 * writeback is not making progress due to locked
@@ -544,144 +622,571 @@ void generic_sync_sb_inodes(struct super_block *sb,
544 wbc->more_io = 1; 622 wbc->more_io = 1;
545 break; 623 break;
546 } 624 }
547 if (!list_empty(&sb->s_more_io)) 625 if (!list_empty(&wb->b_more_io))
548 wbc->more_io = 1; 626 wbc->more_io = 1;
549 } 627 }
550 628
551 if (sync) { 629 spin_unlock(&inode_lock);
552 struct inode *inode, *old_inode = NULL; 630 /* Leave any unwritten inodes on b_io */
631}
632
633void writeback_inodes_wbc(struct writeback_control *wbc)
634{
635 struct backing_dev_info *bdi = wbc->bdi;
636
637 writeback_inodes_wb(&bdi->wb, wbc);
638}
639
640/*
641 * The maximum number of pages to writeout in a single bdi flush/kupdate
642 * operation. We do this so we don't hold I_SYNC against an inode for
643 * enormous amounts of time, which would block a userspace task which has
644 * been forced to throttle against that inode. Also, the code reevaluates
645 * the dirty each time it has written this many pages.
646 */
647#define MAX_WRITEBACK_PAGES 1024
648
649static inline bool over_bground_thresh(void)
650{
651 unsigned long background_thresh, dirty_thresh;
652
653 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
654
655 return (global_page_state(NR_FILE_DIRTY) +
656 global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
657}
658
659/*
660 * Explicit flushing or periodic writeback of "old" data.
661 *
662 * Define "old": the first time one of an inode's pages is dirtied, we mark the
663 * dirtying-time in the inode's address_space. So this periodic writeback code
664 * just walks the superblock inode list, writing back any inodes which are
665 * older than a specific point in time.
666 *
667 * Try to run once per dirty_writeback_interval. But if a writeback event
668 * takes longer than a dirty_writeback_interval interval, then leave a
669 * one-second gap.
670 *
671 * older_than_this takes precedence over nr_to_write. So we'll only write back
672 * all dirty pages if they are all attached to "old" mappings.
673 */
674static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
675 struct super_block *sb,
676 enum writeback_sync_modes sync_mode, int for_kupdate)
677{
678 struct writeback_control wbc = {
679 .bdi = wb->bdi,
680 .sb = sb,
681 .sync_mode = sync_mode,
682 .older_than_this = NULL,
683 .for_kupdate = for_kupdate,
684 .range_cyclic = 1,
685 };
686 unsigned long oldest_jif;
687 long wrote = 0;
688
689 if (wbc.for_kupdate) {
690 wbc.older_than_this = &oldest_jif;
691 oldest_jif = jiffies -
692 msecs_to_jiffies(dirty_expire_interval * 10);
693 }
694
695 for (;;) {
696 /*
697 * Don't flush anything for non-integrity writeback where
698 * no nr_pages was given
699 */
700 if (!for_kupdate && nr_pages <= 0 && sync_mode == WB_SYNC_NONE)
701 break;
553 702
554 /* 703 /*
555 * Data integrity sync. Must wait for all pages under writeback, 704 * If no specific pages were given and this is just a
556 * because there may have been pages dirtied before our sync 705 * periodic background writeout and we are below the
557 * call, but which had writeout started before we write it out. 706 * background dirty threshold, don't do anything
558 * In which case, the inode may not be on the dirty list, but
559 * we still have to wait for that writeout.
560 */ 707 */
561 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 708 if (for_kupdate && nr_pages <= 0 && !over_bground_thresh())
562 struct address_space *mapping; 709 break;
563 710
564 if (inode->i_state & 711 wbc.more_io = 0;
565 (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 712 wbc.encountered_congestion = 0;
566 continue; 713 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
567 mapping = inode->i_mapping; 714 wbc.pages_skipped = 0;
568 if (mapping->nrpages == 0) 715 writeback_inodes_wb(wb, &wbc);
716 nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
717 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
718
719 /*
720 * If we ran out of stuff to write, bail unless more_io got set
721 */
722 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
723 if (wbc.more_io && !wbc.for_kupdate)
569 continue; 724 continue;
570 __iget(inode); 725 break;
571 spin_unlock(&inode_lock); 726 }
727 }
728
729 return wrote;
730}
731
732/*
733 * Return the next bdi_work struct that hasn't been processed by this
734 * wb thread yet
735 */
736static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
737 struct bdi_writeback *wb)
738{
739 struct bdi_work *work, *ret = NULL;
740
741 rcu_read_lock();
742
743 list_for_each_entry_rcu(work, &bdi->work_list, list) {
744 if (!test_and_clear_bit(wb->nr, &work->seen))
745 continue;
746
747 ret = work;
748 break;
749 }
750
751 rcu_read_unlock();
752 return ret;
753}
754
755static long wb_check_old_data_flush(struct bdi_writeback *wb)
756{
757 unsigned long expired;
758 long nr_pages;
759
760 expired = wb->last_old_flush +
761 msecs_to_jiffies(dirty_writeback_interval * 10);
762 if (time_before(jiffies, expired))
763 return 0;
764
765 wb->last_old_flush = jiffies;
766 nr_pages = global_page_state(NR_FILE_DIRTY) +
767 global_page_state(NR_UNSTABLE_NFS) +
768 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
769
770 if (nr_pages)
771 return wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1);
772
773 return 0;
774}
775
776/*
777 * Retrieve work items and do the writeback they describe
778 */
779long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
780{
781 struct backing_dev_info *bdi = wb->bdi;
782 struct bdi_work *work;
783 long nr_pages, wrote = 0;
784
785 while ((work = get_next_work_item(bdi, wb)) != NULL) {
786 enum writeback_sync_modes sync_mode;
787
788 nr_pages = work->nr_pages;
789
790 /*
791 * Override sync mode, in case we must wait for completion
792 */
793 if (force_wait)
794 work->sync_mode = sync_mode = WB_SYNC_ALL;
795 else
796 sync_mode = work->sync_mode;
797
798 /*
799 * If this isn't a data integrity operation, just notify
800 * that we have seen this work and we are now starting it.
801 */
802 if (sync_mode == WB_SYNC_NONE)
803 wb_clear_pending(wb, work);
804
805 wrote += wb_writeback(wb, nr_pages, work->sb, sync_mode, 0);
806
807 /*
808 * This is a data integrity writeback, so only do the
809 * notification when we have completed the work.
810 */
811 if (sync_mode == WB_SYNC_ALL)
812 wb_clear_pending(wb, work);
813 }
814
815 /*
816 * Check for periodic writeback, kupdated() style
817 */
818 wrote += wb_check_old_data_flush(wb);
819
820 return wrote;
821}
822
823/*
824 * Handle writeback of dirty data for the device backed by this bdi. Also
825 * wakes up periodically and does kupdated style flushing.
826 */
827int bdi_writeback_task(struct bdi_writeback *wb)
828{
829 unsigned long last_active = jiffies;
830 unsigned long wait_jiffies = -1UL;
831 long pages_written;
832
833 while (!kthread_should_stop()) {
834 pages_written = wb_do_writeback(wb, 0);
835
836 if (pages_written)
837 last_active = jiffies;
838 else if (wait_jiffies != -1UL) {
839 unsigned long max_idle;
840
572 /* 841 /*
573 * We hold a reference to 'inode' so it couldn't have 842 * Longest period of inactivity that we tolerate. If we
574 * been removed from s_inodes list while we dropped the 843 * see dirty data again later, the task will get
575 * inode_lock. We cannot iput the inode now as we can 844 * recreated automatically.
576 * be holding the last reference and we cannot iput it
577 * under inode_lock. So we keep the reference and iput
578 * it later.
579 */ 845 */
580 iput(old_inode); 846 max_idle = max(5UL * 60 * HZ, wait_jiffies);
581 old_inode = inode; 847 if (time_after(jiffies, max_idle + last_active))
848 break;
849 }
850
851 wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
852 set_current_state(TASK_INTERRUPTIBLE);
853 schedule_timeout(wait_jiffies);
854 try_to_freeze();
855 }
856
857 return 0;
858}
859
860/*
861 * Schedule writeback for all backing devices. Expensive! If this is a data
862 * integrity operation, writeback will be complete when this returns. If
863 * we are simply called for WB_SYNC_NONE, then writeback will merely be
864 * scheduled to run.
865 */
866static void bdi_writeback_all(struct writeback_control *wbc)
867{
868 const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
869 struct backing_dev_info *bdi;
870 struct bdi_work *work;
871 LIST_HEAD(list);
872
873restart:
874 spin_lock(&bdi_lock);
875
876 list_for_each_entry(bdi, &bdi_list, bdi_list) {
877 struct bdi_work *work;
878
879 if (!bdi_has_dirty_io(bdi))
880 continue;
582 881
583 filemap_fdatawait(mapping); 882 /*
883 * If work allocation fails, do the writes inline. We drop
884 * the lock and restart the list writeout. This should be OK,
885 * since this happens rarely and because the writeout should
886 * eventually make more free memory available.
887 */
888 work = bdi_alloc_work(wbc);
889 if (!work) {
890 struct writeback_control __wbc;
584 891
585 cond_resched(); 892 /*
893 * Not a data integrity writeout, just continue
894 */
895 if (!must_wait)
896 continue;
586 897
587 spin_lock(&inode_lock); 898 spin_unlock(&bdi_lock);
899 __wbc = *wbc;
900 __wbc.bdi = bdi;
901 writeback_inodes_wbc(&__wbc);
902 goto restart;
588 } 903 }
589 spin_unlock(&inode_lock); 904 if (must_wait)
590 iput(old_inode); 905 list_add_tail(&work->wait_list, &list);
591 } else 906
592 spin_unlock(&inode_lock); 907 bdi_queue_work(bdi, work);
908 }
909
910 spin_unlock(&bdi_lock);
593 911
594 return; /* Leave any unwritten inodes on s_io */ 912 /*
913 * If this is for WB_SYNC_ALL, wait for pending work to complete
914 * before returning.
915 */
916 while (!list_empty(&list)) {
917 work = list_entry(list.next, struct bdi_work, wait_list);
918 list_del(&work->wait_list);
919 bdi_wait_on_work_clear(work);
920 call_rcu(&work->rcu_head, bdi_work_free);
921 }
595} 922}
596EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
597 923
598static void sync_sb_inodes(struct super_block *sb, 924/*
599 struct writeback_control *wbc) 925 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
926 * the whole world.
927 */
928void wakeup_flusher_threads(long nr_pages)
600{ 929{
601 generic_sync_sb_inodes(sb, wbc); 930 struct writeback_control wbc = {
931 .sync_mode = WB_SYNC_NONE,
932 .older_than_this = NULL,
933 .range_cyclic = 1,
934 };
935
936 if (nr_pages == 0)
937 nr_pages = global_page_state(NR_FILE_DIRTY) +
938 global_page_state(NR_UNSTABLE_NFS);
939 wbc.nr_to_write = nr_pages;
940 bdi_writeback_all(&wbc);
602} 941}
603 942
604/* 943static noinline void block_dump___mark_inode_dirty(struct inode *inode)
605 * Start writeback of dirty pagecache data against all unlocked inodes. 944{
945 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
946 struct dentry *dentry;
947 const char *name = "?";
948
949 dentry = d_find_alias(inode);
950 if (dentry) {
951 spin_lock(&dentry->d_lock);
952 name = (const char *) dentry->d_name.name;
953 }
954 printk(KERN_DEBUG
955 "%s(%d): dirtied inode %lu (%s) on %s\n",
956 current->comm, task_pid_nr(current), inode->i_ino,
957 name, inode->i_sb->s_id);
958 if (dentry) {
959 spin_unlock(&dentry->d_lock);
960 dput(dentry);
961 }
962 }
963}
964
965/**
966 * __mark_inode_dirty - internal function
967 * @inode: inode to mark
968 * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
969 * Mark an inode as dirty. Callers should use mark_inode_dirty or
970 * mark_inode_dirty_sync.
971 *
972 * Put the inode on the super block's dirty list.
973 *
974 * CAREFUL! We mark it dirty unconditionally, but move it onto the
975 * dirty list only if it is hashed or if it refers to a blockdev.
976 * If it was not hashed, it will never be added to the dirty list
977 * even if it is later hashed, as it will have been marked dirty already.
606 * 978 *
607 * Note: 979 * In short, make sure you hash any inodes _before_ you start marking
608 * We don't need to grab a reference to superblock here. If it has non-empty 980 * them dirty.
609 * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
610 * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all
611 * empty. Since __sync_single_inode() regains inode_lock before it finally moves
612 * inode from superblock lists we are OK.
613 * 981 *
614 * If `older_than_this' is non-zero then only flush inodes which have a 982 * This function *must* be atomic for the I_DIRTY_PAGES case -
615 * flushtime older than *older_than_this. 983 * set_page_dirty() is called under spinlock in several places.
616 * 984 *
617 * If `bdi' is non-zero then we will scan the first inode against each 985 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
618 * superblock until we find the matching ones. One group will be the dirty 986 * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
619 * inodes against a filesystem. Then when we hit the dummy blockdev superblock, 987 * the kernel-internal blockdev inode represents the dirtying time of the
620 * sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not 988 * blockdev's pages. This is why for I_DIRTY_PAGES we always use
621 * super-efficient but we're about to do a ton of I/O... 989 * page->mapping->host, so the page-dirtying time is recorded in the internal
990 * blockdev inode.
622 */ 991 */
623void 992void __mark_inode_dirty(struct inode *inode, int flags)
624writeback_inodes(struct writeback_control *wbc)
625{ 993{
626 struct super_block *sb; 994 struct super_block *sb = inode->i_sb;
627 995
628 might_sleep(); 996 /*
629 spin_lock(&sb_lock); 997 * Don't do this for I_DIRTY_PAGES - that doesn't actually
630restart: 998 * dirty the inode itself
631 list_for_each_entry_reverse(sb, &super_blocks, s_list) { 999 */
632 if (sb_has_dirty_inodes(sb)) { 1000 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
633 /* we're making our own get_super here */ 1001 if (sb->s_op->dirty_inode)
634 sb->s_count++; 1002 sb->s_op->dirty_inode(inode);
635 spin_unlock(&sb_lock); 1003 }
636 /* 1004
637 * If we can't get the readlock, there's no sense in 1005 /*
638 * waiting around, most of the time the FS is going to 1006 * make sure that changes are seen by all cpus before we test i_state
639 * be unmounted by the time it is released. 1007 * -- mikulas
640 */ 1008 */
641 if (down_read_trylock(&sb->s_umount)) { 1009 smp_mb();
642 if (sb->s_root) 1010
643 sync_sb_inodes(sb, wbc); 1011 /* avoid the locking if we can */
644 up_read(&sb->s_umount); 1012 if ((inode->i_state & flags) == flags)
1013 return;
1014
1015 if (unlikely(block_dump))
1016 block_dump___mark_inode_dirty(inode);
1017
1018 spin_lock(&inode_lock);
1019 if ((inode->i_state & flags) != flags) {
1020 const int was_dirty = inode->i_state & I_DIRTY;
1021
1022 inode->i_state |= flags;
1023
1024 /*
1025 * If the inode is being synced, just update its dirty state.
1026 * The unlocker will place the inode on the appropriate
1027 * superblock list, based upon its state.
1028 */
1029 if (inode->i_state & I_SYNC)
1030 goto out;
1031
1032 /*
1033 * Only add valid (hashed) inodes to the superblock's
1034 * dirty list. Add blockdev inodes as well.
1035 */
1036 if (!S_ISBLK(inode->i_mode)) {
1037 if (hlist_unhashed(&inode->i_hash))
1038 goto out;
1039 }
1040 if (inode->i_state & (I_FREEING|I_CLEAR))
1041 goto out;
1042
1043 /*
1044 * If the inode was already on b_dirty/b_io/b_more_io, don't
1045 * reposition it (that would break b_dirty time-ordering).
1046 */
1047 if (!was_dirty) {
1048 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1049 struct backing_dev_info *bdi = wb->bdi;
1050
1051 if (bdi_cap_writeback_dirty(bdi) &&
1052 !test_bit(BDI_registered, &bdi->state)) {
1053 WARN_ON(1);
1054 printk(KERN_ERR "bdi-%s not registered\n",
1055 bdi->name);
645 } 1056 }
646 spin_lock(&sb_lock); 1057
647 if (__put_super_and_need_restart(sb)) 1058 inode->dirtied_when = jiffies;
648 goto restart; 1059 list_move(&inode->i_list, &wb->b_dirty);
649 } 1060 }
650 if (wbc->nr_to_write <= 0)
651 break;
652 } 1061 }
653 spin_unlock(&sb_lock); 1062out:
1063 spin_unlock(&inode_lock);
654} 1064}
1065EXPORT_SYMBOL(__mark_inode_dirty);
655 1066
656/* 1067/*
657 * writeback and wait upon the filesystem's dirty inodes. The caller will 1068 * Write out a superblock's list of dirty inodes. A wait will be performed
658 * do this in two passes - one to write, and one to wait. 1069 * upon no inodes, all inodes or the final one, depending upon sync_mode.
1070 *
1071 * If older_than_this is non-NULL, then only write out inodes which
1072 * had their first dirtying at a time earlier than *older_than_this.
1073 *
1074 * If we're a pdlfush thread, then implement pdflush collision avoidance
1075 * against the entire list.
659 * 1076 *
660 * A finite limit is set on the number of pages which will be written. 1077 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
661 * To prevent infinite livelock of sys_sync(). 1078 * This function assumes that the blockdev superblock's inodes are backed by
1079 * a variety of queues, so all inodes are searched. For other superblocks,
1080 * assume that all inodes are backed by the same queue.
662 * 1081 *
663 * We add in the number of potentially dirty inodes, because each inode write 1082 * The inodes to be written are parked on bdi->b_io. They are moved back onto
664 * can dirty pagecache in the underlying blockdev. 1083 * bdi->b_dirty as they are selected for writing. This way, none can be missed
1084 * on the writer throttling path, and we get decent balancing between many
1085 * throttled threads: we don't want them all piling up on inode_sync_wait.
1086 */
1087static void wait_sb_inodes(struct writeback_control *wbc)
1088{
1089 struct inode *inode, *old_inode = NULL;
1090
1091 /*
1092 * We need to be protected against the filesystem going from
1093 * r/o to r/w or vice versa.
1094 */
1095 WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount));
1096
1097 spin_lock(&inode_lock);
1098
1099 /*
1100 * Data integrity sync. Must wait for all pages under writeback,
1101 * because there may have been pages dirtied before our sync
1102 * call, but which had writeout started before we write it out.
1103 * In which case, the inode may not be on the dirty list, but
1104 * we still have to wait for that writeout.
1105 */
1106 list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) {
1107 struct address_space *mapping;
1108
1109 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
1110 continue;
1111 mapping = inode->i_mapping;
1112 if (mapping->nrpages == 0)
1113 continue;
1114 __iget(inode);
1115 spin_unlock(&inode_lock);
1116 /*
1117 * We hold a reference to 'inode' so it couldn't have
1118 * been removed from s_inodes list while we dropped the
1119 * inode_lock. We cannot iput the inode now as we can
1120 * be holding the last reference and we cannot iput it
1121 * under inode_lock. So we keep the reference and iput
1122 * it later.
1123 */
1124 iput(old_inode);
1125 old_inode = inode;
1126
1127 filemap_fdatawait(mapping);
1128
1129 cond_resched();
1130
1131 spin_lock(&inode_lock);
1132 }
1133 spin_unlock(&inode_lock);
1134 iput(old_inode);
1135}
1136
1137/**
1138 * writeback_inodes_sb - writeback dirty inodes from given super_block
1139 * @sb: the superblock
1140 *
1141 * Start writeback on some inodes on this super_block. No guarantees are made
1142 * on how many (if any) will be written, and this function does not wait
1143 * for IO completion of submitted IO. The number of pages submitted is
1144 * returned.
665 */ 1145 */
666void sync_inodes_sb(struct super_block *sb, int wait) 1146long writeback_inodes_sb(struct super_block *sb)
667{ 1147{
668 struct writeback_control wbc = { 1148 struct writeback_control wbc = {
669 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, 1149 .sb = sb,
1150 .sync_mode = WB_SYNC_NONE,
670 .range_start = 0, 1151 .range_start = 0,
671 .range_end = LLONG_MAX, 1152 .range_end = LLONG_MAX,
672 }; 1153 };
1154 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1155 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1156 long nr_to_write;
673 1157
674 if (!wait) { 1158 nr_to_write = nr_dirty + nr_unstable +
675 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
676 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
677
678 wbc.nr_to_write = nr_dirty + nr_unstable +
679 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 1159 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
680 } else
681 wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */
682 1160
683 sync_sb_inodes(sb, &wbc); 1161 wbc.nr_to_write = nr_to_write;
1162 bdi_writeback_all(&wbc);
1163 return nr_to_write - wbc.nr_to_write;
684} 1164}
1165EXPORT_SYMBOL(writeback_inodes_sb);
1166
1167/**
1168 * sync_inodes_sb - sync sb inode pages
1169 * @sb: the superblock
1170 *
1171 * This function writes and waits on any dirty inode belonging to this
1172 * super_block. The number of pages synced is returned.
1173 */
1174long sync_inodes_sb(struct super_block *sb)
1175{
1176 struct writeback_control wbc = {
1177 .sb = sb,
1178 .sync_mode = WB_SYNC_ALL,
1179 .range_start = 0,
1180 .range_end = LLONG_MAX,
1181 };
1182 long nr_to_write = LONG_MAX; /* doesn't actually matter */
1183
1184 wbc.nr_to_write = nr_to_write;
1185 bdi_writeback_all(&wbc);
1186 wait_sb_inodes(&wbc);
1187 return nr_to_write - wbc.nr_to_write;
1188}
1189EXPORT_SYMBOL(sync_inodes_sb);
685 1190
686/** 1191/**
687 * write_inode_now - write an inode to disk 1192 * write_inode_now - write an inode to disk
@@ -737,57 +1242,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
737 return ret; 1242 return ret;
738} 1243}
739EXPORT_SYMBOL(sync_inode); 1244EXPORT_SYMBOL(sync_inode);
740
741/**
742 * generic_osync_inode - flush all dirty data for a given inode to disk
743 * @inode: inode to write
744 * @mapping: the address_space that should be flushed
745 * @what: what to write and wait upon
746 *
747 * This can be called by file_write functions for files which have the
748 * O_SYNC flag set, to flush dirty writes to disk.
749 *
750 * @what is a bitmask, specifying which part of the inode's data should be
751 * written and waited upon.
752 *
753 * OSYNC_DATA: i_mapping's dirty data
754 * OSYNC_METADATA: the buffers at i_mapping->private_list
755 * OSYNC_INODE: the inode itself
756 */
757
758int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what)
759{
760 int err = 0;
761 int need_write_inode_now = 0;
762 int err2;
763
764 if (what & OSYNC_DATA)
765 err = filemap_fdatawrite(mapping);
766 if (what & (OSYNC_METADATA|OSYNC_DATA)) {
767 err2 = sync_mapping_buffers(mapping);
768 if (!err)
769 err = err2;
770 }
771 if (what & OSYNC_DATA) {
772 err2 = filemap_fdatawait(mapping);
773 if (!err)
774 err = err2;
775 }
776
777 spin_lock(&inode_lock);
778 if ((inode->i_state & I_DIRTY) &&
779 ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
780 need_write_inode_now = 1;
781 spin_unlock(&inode_lock);
782
783 if (need_write_inode_now) {
784 err2 = write_inode_now(inode, 1);
785 if (!err)
786 err = err2;
787 }
788 else
789 inode_sync_wait(inode);
790
791 return err;
792}
793EXPORT_SYMBOL(generic_osync_inode);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f91ccc4a189d..4567db6f9430 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -801,6 +801,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
801{ 801{
802 int err; 802 int err;
803 803
804 fc->bdi.name = "fuse";
804 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 805 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
805 fc->bdi.unplug_io_fn = default_unplug_io_fn; 806 fc->bdi.unplug_io_fn = default_unplug_io_fn;
806 /* fuse does it's own writeback accounting */ 807 /* fuse does it's own writeback accounting */
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index 3da2f1f4f738..21f7e46da4c0 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,6 +1,6 @@
1EXTRA_CFLAGS := -I$(src) 1EXTRA_CFLAGS := -I$(src)
2obj-$(CONFIG_GFS2_FS) += gfs2.o 2obj-$(CONFIG_GFS2_FS) += gfs2.o
3gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \ 3gfs2-y := acl.o bmap.o dir.o xattr.o glock.o \
4 glops.o inode.o log.o lops.o main.o meta_io.o \ 4 glops.o inode.o log.o lops.o main.o meta_io.o \
5 aops.o dentry.o export.o file.o \ 5 aops.o dentry.o export.o file.o \
6 ops_fstype.o ops_inode.o quota.o \ 6 ops_fstype.o ops_inode.o quota.o \
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index fa881bdc3d85..3fc4e3ac7d84 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -19,8 +19,7 @@
19#include "gfs2.h" 19#include "gfs2.h"
20#include "incore.h" 20#include "incore.h"
21#include "acl.h" 21#include "acl.h"
22#include "eaops.h" 22#include "xattr.h"
23#include "eattr.h"
24#include "glock.h" 23#include "glock.h"
25#include "inode.h" 24#include "inode.h"
26#include "meta_io.h" 25#include "meta_io.h"
@@ -31,8 +30,7 @@
31#define ACL_DEFAULT 0 30#define ACL_DEFAULT 0
32 31
33int gfs2_acl_validate_set(struct gfs2_inode *ip, int access, 32int gfs2_acl_validate_set(struct gfs2_inode *ip, int access,
34 struct gfs2_ea_request *er, 33 struct gfs2_ea_request *er, int *remove, mode_t *mode)
35 int *remove, mode_t *mode)
36{ 34{
37 struct posix_acl *acl; 35 struct posix_acl *acl;
38 int error; 36 int error;
@@ -83,30 +81,20 @@ int gfs2_acl_validate_remove(struct gfs2_inode *ip, int access)
83 return 0; 81 return 0;
84} 82}
85 83
86static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl, 84static int acl_get(struct gfs2_inode *ip, const char *name,
87 struct gfs2_ea_location *el, char **data, unsigned int *len) 85 struct posix_acl **acl, struct gfs2_ea_location *el,
86 char **datap, unsigned int *lenp)
88{ 87{
89 struct gfs2_ea_request er; 88 char *data;
90 struct gfs2_ea_location el_this; 89 unsigned int len;
91 int error; 90 int error;
92 91
92 el->el_bh = NULL;
93
93 if (!ip->i_eattr) 94 if (!ip->i_eattr)
94 return 0; 95 return 0;
95 96
96 memset(&er, 0, sizeof(struct gfs2_ea_request)); 97 error = gfs2_ea_find(ip, GFS2_EATYPE_SYS, name, el);
97 if (access) {
98 er.er_name = GFS2_POSIX_ACL_ACCESS;
99 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
100 } else {
101 er.er_name = GFS2_POSIX_ACL_DEFAULT;
102 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN;
103 }
104 er.er_type = GFS2_EATYPE_SYS;
105
106 if (!el)
107 el = &el_this;
108
109 error = gfs2_ea_find(ip, &er, el);
110 if (error) 98 if (error)
111 return error; 99 return error;
112 if (!el->el_ea) 100 if (!el->el_ea)
@@ -114,32 +102,31 @@ static int acl_get(struct gfs2_inode *ip, int access, struct posix_acl **acl,
114 if (!GFS2_EA_DATA_LEN(el->el_ea)) 102 if (!GFS2_EA_DATA_LEN(el->el_ea))
115 goto out; 103 goto out;
116 104
117 er.er_data_len = GFS2_EA_DATA_LEN(el->el_ea); 105 len = GFS2_EA_DATA_LEN(el->el_ea);
118 er.er_data = kmalloc(er.er_data_len, GFP_NOFS); 106 data = kmalloc(len, GFP_NOFS);
119 error = -ENOMEM; 107 error = -ENOMEM;
120 if (!er.er_data) 108 if (!data)
121 goto out; 109 goto out;
122 110
123 error = gfs2_ea_get_copy(ip, el, er.er_data); 111 error = gfs2_ea_get_copy(ip, el, data, len);
124 if (error) 112 if (error < 0)
125 goto out_kfree; 113 goto out_kfree;
114 error = 0;
126 115
127 if (acl) { 116 if (acl) {
128 *acl = posix_acl_from_xattr(er.er_data, er.er_data_len); 117 *acl = posix_acl_from_xattr(data, len);
129 if (IS_ERR(*acl)) 118 if (IS_ERR(*acl))
130 error = PTR_ERR(*acl); 119 error = PTR_ERR(*acl);
131 } 120 }
132 121
133out_kfree: 122out_kfree:
134 if (error || !data) 123 if (error || !datap) {
135 kfree(er.er_data); 124 kfree(data);
136 else { 125 } else {
137 *data = er.er_data; 126 *datap = data;
138 *len = er.er_data_len; 127 *lenp = len;
139 } 128 }
140out: 129out:
141 if (error || el == &el_this)
142 brelse(el->el_bh);
143 return error; 130 return error;
144} 131}
145 132
@@ -153,10 +140,12 @@ out:
153 140
154int gfs2_check_acl(struct inode *inode, int mask) 141int gfs2_check_acl(struct inode *inode, int mask)
155{ 142{
143 struct gfs2_ea_location el;
156 struct posix_acl *acl = NULL; 144 struct posix_acl *acl = NULL;
157 int error; 145 int error;
158 146
159 error = acl_get(GFS2_I(inode), ACL_ACCESS, &acl, NULL, NULL, NULL); 147 error = acl_get(GFS2_I(inode), GFS2_POSIX_ACL_ACCESS, &acl, &el, NULL, NULL);
148 brelse(el.el_bh);
160 if (error) 149 if (error)
161 return error; 150 return error;
162 151
@@ -196,10 +185,12 @@ static int munge_mode(struct gfs2_inode *ip, mode_t mode)
196 185
197int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip) 186int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
198{ 187{
188 struct gfs2_ea_location el;
199 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 189 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
200 struct posix_acl *acl = NULL, *clone; 190 struct posix_acl *acl = NULL, *clone;
201 struct gfs2_ea_request er;
202 mode_t mode = ip->i_inode.i_mode; 191 mode_t mode = ip->i_inode.i_mode;
192 char *data = NULL;
193 unsigned int len;
203 int error; 194 int error;
204 195
205 if (!sdp->sd_args.ar_posix_acl) 196 if (!sdp->sd_args.ar_posix_acl)
@@ -207,11 +198,8 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
207 if (S_ISLNK(ip->i_inode.i_mode)) 198 if (S_ISLNK(ip->i_inode.i_mode))
208 return 0; 199 return 0;
209 200
210 memset(&er, 0, sizeof(struct gfs2_ea_request)); 201 error = acl_get(dip, GFS2_POSIX_ACL_DEFAULT, &acl, &el, &data, &len);
211 er.er_type = GFS2_EATYPE_SYS; 202 brelse(el.el_bh);
212
213 error = acl_get(dip, ACL_DEFAULT, &acl, NULL,
214 &er.er_data, &er.er_data_len);
215 if (error) 203 if (error)
216 return error; 204 return error;
217 if (!acl) { 205 if (!acl) {
@@ -229,9 +217,8 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
229 acl = clone; 217 acl = clone;
230 218
231 if (S_ISDIR(ip->i_inode.i_mode)) { 219 if (S_ISDIR(ip->i_inode.i_mode)) {
232 er.er_name = GFS2_POSIX_ACL_DEFAULT; 220 error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS,
233 er.er_name_len = GFS2_POSIX_ACL_DEFAULT_LEN; 221 GFS2_POSIX_ACL_DEFAULT, data, len, 0);
234 error = gfs2_system_eaops.eo_set(ip, &er);
235 if (error) 222 if (error)
236 goto out; 223 goto out;
237 } 224 }
@@ -239,21 +226,19 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct gfs2_inode *ip)
239 error = posix_acl_create_masq(acl, &mode); 226 error = posix_acl_create_masq(acl, &mode);
240 if (error < 0) 227 if (error < 0)
241 goto out; 228 goto out;
242 if (error > 0) { 229 if (error == 0)
243 er.er_name = GFS2_POSIX_ACL_ACCESS; 230 goto munge;
244 er.er_name_len = GFS2_POSIX_ACL_ACCESS_LEN;
245 posix_acl_to_xattr(acl, er.er_data, er.er_data_len);
246 er.er_mode = mode;
247 er.er_flags = GFS2_ERF_MODE;
248 error = gfs2_system_eaops.eo_set(ip, &er);
249 if (error)
250 goto out;
251 } else
252 munge_mode(ip, mode);
253 231
232 posix_acl_to_xattr(acl, data, len);
233 error = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SYS,
234 GFS2_POSIX_ACL_ACCESS, data, len, 0);
235 if (error)
236 goto out;
237munge:
238 error = munge_mode(ip, mode);
254out: 239out:
255 posix_acl_release(acl); 240 posix_acl_release(acl);
256 kfree(er.er_data); 241 kfree(data);
257 return error; 242 return error;
258} 243}
259 244
@@ -265,9 +250,9 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
265 unsigned int len; 250 unsigned int len;
266 int error; 251 int error;
267 252
268 error = acl_get(ip, ACL_ACCESS, &acl, &el, &data, &len); 253 error = acl_get(ip, GFS2_POSIX_ACL_ACCESS, &acl, &el, &data, &len);
269 if (error) 254 if (error)
270 return error; 255 goto out_brelse;
271 if (!acl) 256 if (!acl)
272 return gfs2_setattr_simple(ip, attr); 257 return gfs2_setattr_simple(ip, attr);
273 258
@@ -286,8 +271,9 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
286 271
287out: 272out:
288 posix_acl_release(acl); 273 posix_acl_release(acl);
289 brelse(el.el_bh);
290 kfree(data); 274 kfree(data);
275out_brelse:
276 brelse(el.el_bh);
291 return error; 277 return error;
292} 278}
293 279
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c
index 022c66cd5606..91beddadd388 100644
--- a/fs/gfs2/dentry.c
+++ b/fs/gfs2/dentry.c
@@ -107,8 +107,26 @@ static int gfs2_dhash(struct dentry *dentry, struct qstr *str)
107 return 0; 107 return 0;
108} 108}
109 109
110static int gfs2_dentry_delete(struct dentry *dentry)
111{
112 struct gfs2_inode *ginode;
113
114 if (!dentry->d_inode)
115 return 0;
116
117 ginode = GFS2_I(dentry->d_inode);
118 if (!ginode->i_iopen_gh.gh_gl)
119 return 0;
120
121 if (test_bit(GLF_DEMOTE, &ginode->i_iopen_gh.gh_gl->gl_flags))
122 return 1;
123
124 return 0;
125}
126
110const struct dentry_operations gfs2_dops = { 127const struct dentry_operations gfs2_dops = {
111 .d_revalidate = gfs2_drevalidate, 128 .d_revalidate = gfs2_drevalidate,
112 .d_hash = gfs2_dhash, 129 .d_hash = gfs2_dhash,
130 .d_delete = gfs2_dentry_delete,
113}; 131};
114 132
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
deleted file mode 100644
index dee9b03e5b37..000000000000
--- a/fs/gfs2/eaops.c
+++ /dev/null
@@ -1,157 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#include <linux/slab.h>
11#include <linux/spinlock.h>
12#include <linux/completion.h>
13#include <linux/buffer_head.h>
14#include <linux/capability.h>
15#include <linux/xattr.h>
16#include <linux/gfs2_ondisk.h>
17#include <asm/uaccess.h>
18
19#include "gfs2.h"
20#include "incore.h"
21#include "acl.h"
22#include "eaops.h"
23#include "eattr.h"
24#include "util.h"
25
26/**
27 * gfs2_ea_name2type - get the type of the ea, and truncate type from the name
28 * @namep: ea name, possibly with type appended
29 *
30 * Returns: GFS2_EATYPE_XXX
31 */
32
33unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name)
34{
35 unsigned int type;
36
37 if (strncmp(name, "system.", 7) == 0) {
38 type = GFS2_EATYPE_SYS;
39 if (truncated_name)
40 *truncated_name = name + sizeof("system.") - 1;
41 } else if (strncmp(name, "user.", 5) == 0) {
42 type = GFS2_EATYPE_USR;
43 if (truncated_name)
44 *truncated_name = name + sizeof("user.") - 1;
45 } else if (strncmp(name, "security.", 9) == 0) {
46 type = GFS2_EATYPE_SECURITY;
47 if (truncated_name)
48 *truncated_name = name + sizeof("security.") - 1;
49 } else {
50 type = GFS2_EATYPE_UNUSED;
51 if (truncated_name)
52 *truncated_name = NULL;
53 }
54
55 return type;
56}
57
58static int system_eo_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
59{
60 if (!GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) &&
61 !GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len) &&
62 !capable(CAP_SYS_ADMIN))
63 return -EPERM;
64
65 if (GFS2_SB(&ip->i_inode)->sd_args.ar_posix_acl == 0 &&
66 (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len) ||
67 GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)))
68 return -EOPNOTSUPP;
69
70 return gfs2_ea_get_i(ip, er);
71}
72
73static int system_eo_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
74{
75 int remove = 0;
76 int error;
77
78 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
79 if (!(er->er_flags & GFS2_ERF_MODE)) {
80 er->er_mode = ip->i_inode.i_mode;
81 er->er_flags |= GFS2_ERF_MODE;
82 }
83 error = gfs2_acl_validate_set(ip, 1, er,
84 &remove, &er->er_mode);
85 if (error)
86 return error;
87 error = gfs2_ea_set_i(ip, er);
88 if (error)
89 return error;
90 if (remove)
91 gfs2_ea_remove_i(ip, er);
92 return 0;
93
94 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
95 error = gfs2_acl_validate_set(ip, 0, er,
96 &remove, NULL);
97 if (error)
98 return error;
99 if (!remove)
100 error = gfs2_ea_set_i(ip, er);
101 else {
102 error = gfs2_ea_remove_i(ip, er);
103 if (error == -ENODATA)
104 error = 0;
105 }
106 return error;
107 }
108
109 return -EPERM;
110}
111
112static int system_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er)
113{
114 if (GFS2_ACL_IS_ACCESS(er->er_name, er->er_name_len)) {
115 int error = gfs2_acl_validate_remove(ip, 1);
116 if (error)
117 return error;
118
119 } else if (GFS2_ACL_IS_DEFAULT(er->er_name, er->er_name_len)) {
120 int error = gfs2_acl_validate_remove(ip, 0);
121 if (error)
122 return error;
123
124 } else
125 return -EPERM;
126
127 return gfs2_ea_remove_i(ip, er);
128}
129
130static const struct gfs2_eattr_operations gfs2_user_eaops = {
131 .eo_get = gfs2_ea_get_i,
132 .eo_set = gfs2_ea_set_i,
133 .eo_remove = gfs2_ea_remove_i,
134 .eo_name = "user",
135};
136
137const struct gfs2_eattr_operations gfs2_system_eaops = {
138 .eo_get = system_eo_get,
139 .eo_set = system_eo_set,
140 .eo_remove = system_eo_remove,
141 .eo_name = "system",
142};
143
144static const struct gfs2_eattr_operations gfs2_security_eaops = {
145 .eo_get = gfs2_ea_get_i,
146 .eo_set = gfs2_ea_set_i,
147 .eo_remove = gfs2_ea_remove_i,
148 .eo_name = "security",
149};
150
151const struct gfs2_eattr_operations *gfs2_ea_ops[] = {
152 NULL,
153 &gfs2_user_eaops,
154 &gfs2_system_eaops,
155 &gfs2_security_eaops,
156};
157
diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h
deleted file mode 100644
index da2f7fbbb40d..000000000000
--- a/fs/gfs2/eaops.h
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
7 * of the GNU General Public License version 2.
8 */
9
10#ifndef __EAOPS_DOT_H__
11#define __EAOPS_DOT_H__
12
13struct gfs2_ea_request;
14struct gfs2_inode;
15
16struct gfs2_eattr_operations {
17 int (*eo_get) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
18 int (*eo_set) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
19 int (*eo_remove) (struct gfs2_inode *ip, struct gfs2_ea_request *er);
20 char *eo_name;
21};
22
23unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name);
24
25extern const struct gfs2_eattr_operations gfs2_system_eaops;
26
27extern const struct gfs2_eattr_operations *gfs2_ea_ops[];
28
29#endif /* __EAOPS_DOT_H__ */
30
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 9200ef221716..d15876e9aa26 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -143,17 +143,14 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
143} 143}
144 144
145static struct dentry *gfs2_get_dentry(struct super_block *sb, 145static struct dentry *gfs2_get_dentry(struct super_block *sb,
146 struct gfs2_inum_host *inum) 146 struct gfs2_inum_host *inum)
147{ 147{
148 struct gfs2_sbd *sdp = sb->s_fs_info; 148 struct gfs2_sbd *sdp = sb->s_fs_info;
149 struct gfs2_holder i_gh, ri_gh, rgd_gh; 149 struct gfs2_holder i_gh;
150 struct gfs2_rgrpd *rgd;
151 struct inode *inode; 150 struct inode *inode;
152 struct dentry *dentry; 151 struct dentry *dentry;
153 int error; 152 int error;
154 153
155 /* System files? */
156
157 inode = gfs2_ilookup(sb, inum->no_addr); 154 inode = gfs2_ilookup(sb, inum->no_addr);
158 if (inode) { 155 if (inode) {
159 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) { 156 if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
@@ -168,29 +165,11 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
168 if (error) 165 if (error)
169 return ERR_PTR(error); 166 return ERR_PTR(error);
170 167
171 error = gfs2_rindex_hold(sdp, &ri_gh); 168 error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE);
172 if (error) 169 if (error)
173 goto fail; 170 goto fail;
174 171
175 error = -EINVAL; 172 inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0, 0);
176 rgd = gfs2_blk2rgrpd(sdp, inum->no_addr);
177 if (!rgd)
178 goto fail_rindex;
179
180 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
181 if (error)
182 goto fail_rindex;
183
184 error = -ESTALE;
185 if (gfs2_get_block_type(rgd, inum->no_addr) != GFS2_BLKST_DINODE)
186 goto fail_rgd;
187
188 gfs2_glock_dq_uninit(&rgd_gh);
189 gfs2_glock_dq_uninit(&ri_gh);
190
191 inode = gfs2_inode_lookup(sb, DT_UNKNOWN,
192 inum->no_addr,
193 0, 0);
194 if (IS_ERR(inode)) { 173 if (IS_ERR(inode)) {
195 error = PTR_ERR(inode); 174 error = PTR_ERR(inode);
196 goto fail; 175 goto fail;
@@ -224,13 +203,6 @@ out_inode:
224 if (!IS_ERR(dentry)) 203 if (!IS_ERR(dentry))
225 dentry->d_op = &gfs2_dops; 204 dentry->d_op = &gfs2_dops;
226 return dentry; 205 return dentry;
227
228fail_rgd:
229 gfs2_glock_dq_uninit(&rgd_gh);
230
231fail_rindex:
232 gfs2_glock_dq_uninit(&ri_gh);
233
234fail: 206fail:
235 gfs2_glock_dq_uninit(&i_gh); 207 gfs2_glock_dq_uninit(&i_gh);
236 return ERR_PTR(error); 208 return ERR_PTR(error);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 73318a3ce6f1..166f38fbd246 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -38,7 +38,6 @@
38#include "rgrp.h" 38#include "rgrp.h"
39#include "trans.h" 39#include "trans.h"
40#include "util.h" 40#include "util.h"
41#include "eaops.h"
42 41
43/** 42/**
44 * gfs2_llseek - seek to a location in a file 43 * gfs2_llseek - seek to a location in a file
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 61801ada36f0..6edb423f90b3 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -406,6 +406,12 @@ struct gfs2_statfs_change_host {
406#define GFS2_DATA_WRITEBACK 1 406#define GFS2_DATA_WRITEBACK 1
407#define GFS2_DATA_ORDERED 2 407#define GFS2_DATA_ORDERED 2
408 408
409#define GFS2_ERRORS_DEFAULT GFS2_ERRORS_WITHDRAW
410#define GFS2_ERRORS_WITHDRAW 0
411#define GFS2_ERRORS_CONTINUE 1 /* place holder for future feature */
412#define GFS2_ERRORS_RO 2 /* place holder for future feature */
413#define GFS2_ERRORS_PANIC 3
414
409struct gfs2_args { 415struct gfs2_args {
410 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ 416 char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */
411 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ 417 char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */
@@ -422,6 +428,7 @@ struct gfs2_args {
422 unsigned int ar_data:2; /* ordered/writeback */ 428 unsigned int ar_data:2; /* ordered/writeback */
423 unsigned int ar_meta:1; /* mount metafs */ 429 unsigned int ar_meta:1; /* mount metafs */
424 unsigned int ar_discard:1; /* discard requests */ 430 unsigned int ar_discard:1; /* discard requests */
431 unsigned int ar_errors:2; /* errors=withdraw | panic */
425 int ar_commit; /* Commit interval */ 432 int ar_commit; /* Commit interval */
426}; 433};
427 434
@@ -489,7 +496,6 @@ struct gfs2_sb_host {
489 */ 496 */
490 497
491struct lm_lockstruct { 498struct lm_lockstruct {
492 u32 ls_id;
493 unsigned int ls_jid; 499 unsigned int ls_jid;
494 unsigned int ls_first; 500 unsigned int ls_first;
495 unsigned int ls_first_done; 501 unsigned int ls_first_done;
@@ -541,18 +547,12 @@ struct gfs2_sbd {
541 struct dentry *sd_root_dir; 547 struct dentry *sd_root_dir;
542 548
543 struct inode *sd_jindex; 549 struct inode *sd_jindex;
544 struct inode *sd_inum_inode;
545 struct inode *sd_statfs_inode; 550 struct inode *sd_statfs_inode;
546 struct inode *sd_ir_inode;
547 struct inode *sd_sc_inode; 551 struct inode *sd_sc_inode;
548 struct inode *sd_qc_inode; 552 struct inode *sd_qc_inode;
549 struct inode *sd_rindex; 553 struct inode *sd_rindex;
550 struct inode *sd_quota_inode; 554 struct inode *sd_quota_inode;
551 555
552 /* Inum stuff */
553
554 struct mutex sd_inum_mutex;
555
556 /* StatFS stuff */ 556 /* StatFS stuff */
557 557
558 spinlock_t sd_statfs_spin; 558 spinlock_t sd_statfs_spin;
@@ -580,7 +580,6 @@ struct gfs2_sbd {
580 struct gfs2_holder sd_journal_gh; 580 struct gfs2_holder sd_journal_gh;
581 struct gfs2_holder sd_jinode_gh; 581 struct gfs2_holder sd_jinode_gh;
582 582
583 struct gfs2_holder sd_ir_gh;
584 struct gfs2_holder sd_sc_gh; 583 struct gfs2_holder sd_sc_gh;
585 struct gfs2_holder sd_qc_gh; 584 struct gfs2_holder sd_qc_gh;
586 585
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 2f94bd723698..fb15d3b1f409 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -24,7 +24,7 @@
24#include "acl.h" 24#include "acl.h"
25#include "bmap.h" 25#include "bmap.h"
26#include "dir.h" 26#include "dir.h"
27#include "eattr.h" 27#include "xattr.h"
28#include "glock.h" 28#include "glock.h"
29#include "glops.h" 29#include "glops.h"
30#include "inode.h" 30#include "inode.h"
@@ -519,139 +519,6 @@ out:
519 return inode ? inode : ERR_PTR(error); 519 return inode ? inode : ERR_PTR(error);
520} 520}
521 521
522static void gfs2_inum_range_in(struct gfs2_inum_range_host *ir, const void *buf)
523{
524 const struct gfs2_inum_range *str = buf;
525
526 ir->ir_start = be64_to_cpu(str->ir_start);
527 ir->ir_length = be64_to_cpu(str->ir_length);
528}
529
530static void gfs2_inum_range_out(const struct gfs2_inum_range_host *ir, void *buf)
531{
532 struct gfs2_inum_range *str = buf;
533
534 str->ir_start = cpu_to_be64(ir->ir_start);
535 str->ir_length = cpu_to_be64(ir->ir_length);
536}
537
538static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
539{
540 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
541 struct buffer_head *bh;
542 struct gfs2_inum_range_host ir;
543 int error;
544
545 error = gfs2_trans_begin(sdp, RES_DINODE, 0);
546 if (error)
547 return error;
548 mutex_lock(&sdp->sd_inum_mutex);
549
550 error = gfs2_meta_inode_buffer(ip, &bh);
551 if (error) {
552 mutex_unlock(&sdp->sd_inum_mutex);
553 gfs2_trans_end(sdp);
554 return error;
555 }
556
557 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
558
559 if (ir.ir_length) {
560 *formal_ino = ir.ir_start++;
561 ir.ir_length--;
562 gfs2_trans_add_bh(ip->i_gl, bh, 1);
563 gfs2_inum_range_out(&ir,
564 bh->b_data + sizeof(struct gfs2_dinode));
565 brelse(bh);
566 mutex_unlock(&sdp->sd_inum_mutex);
567 gfs2_trans_end(sdp);
568 return 0;
569 }
570
571 brelse(bh);
572
573 mutex_unlock(&sdp->sd_inum_mutex);
574 gfs2_trans_end(sdp);
575
576 return 1;
577}
578
579static int pick_formal_ino_2(struct gfs2_sbd *sdp, u64 *formal_ino)
580{
581 struct gfs2_inode *ip = GFS2_I(sdp->sd_ir_inode);
582 struct gfs2_inode *m_ip = GFS2_I(sdp->sd_inum_inode);
583 struct gfs2_holder gh;
584 struct buffer_head *bh;
585 struct gfs2_inum_range_host ir;
586 int error;
587
588 error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
589 if (error)
590 return error;
591
592 error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
593 if (error)
594 goto out;
595 mutex_lock(&sdp->sd_inum_mutex);
596
597 error = gfs2_meta_inode_buffer(ip, &bh);
598 if (error)
599 goto out_end_trans;
600
601 gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
602
603 if (!ir.ir_length) {
604 struct buffer_head *m_bh;
605 u64 x, y;
606 __be64 z;
607
608 error = gfs2_meta_inode_buffer(m_ip, &m_bh);
609 if (error)
610 goto out_brelse;
611
612 z = *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode));
613 x = y = be64_to_cpu(z);
614 ir.ir_start = x;
615 ir.ir_length = GFS2_INUM_QUANTUM;
616 x += GFS2_INUM_QUANTUM;
617 if (x < y)
618 gfs2_consist_inode(m_ip);
619 z = cpu_to_be64(x);
620 gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
621 *(__be64 *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = z;
622
623 brelse(m_bh);
624 }
625
626 *formal_ino = ir.ir_start++;
627 ir.ir_length--;
628
629 gfs2_trans_add_bh(ip->i_gl, bh, 1);
630 gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode));
631
632out_brelse:
633 brelse(bh);
634out_end_trans:
635 mutex_unlock(&sdp->sd_inum_mutex);
636 gfs2_trans_end(sdp);
637out:
638 gfs2_glock_dq_uninit(&gh);
639 return error;
640}
641
642static int pick_formal_ino(struct gfs2_sbd *sdp, u64 *inum)
643{
644 int error;
645
646 error = pick_formal_ino_1(sdp, inum);
647 if (error <= 0)
648 return error;
649
650 error = pick_formal_ino_2(sdp, inum);
651
652 return error;
653}
654
655/** 522/**
656 * create_ok - OK to create a new on-disk inode here? 523 * create_ok - OK to create a new on-disk inode here?
657 * @dip: Directory in which dinode is to be created 524 * @dip: Directory in which dinode is to be created
@@ -731,7 +598,7 @@ static int alloc_dinode(struct gfs2_inode *dip, u64 *no_addr, u64 *generation)
731 if (error) 598 if (error)
732 goto out_ipreserv; 599 goto out_ipreserv;
733 600
734 *no_addr = gfs2_alloc_di(dip, generation); 601 error = gfs2_alloc_di(dip, no_addr, generation);
735 602
736 gfs2_trans_end(sdp); 603 gfs2_trans_end(sdp);
737 604
@@ -924,7 +791,6 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
924 size_t len; 791 size_t len;
925 void *value; 792 void *value;
926 char *name; 793 char *name;
927 struct gfs2_ea_request er;
928 794
929 err = security_inode_init_security(&ip->i_inode, &dip->i_inode, 795 err = security_inode_init_security(&ip->i_inode, &dip->i_inode,
930 &name, &value, &len); 796 &name, &value, &len);
@@ -935,16 +801,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
935 return err; 801 return err;
936 } 802 }
937 803
938 memset(&er, 0, sizeof(struct gfs2_ea_request)); 804 err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0);
939
940 er.er_type = GFS2_EATYPE_SECURITY;
941 er.er_name = name;
942 er.er_data = value;
943 er.er_name_len = strlen(name);
944 er.er_data_len = len;
945
946 err = gfs2_ea_set_i(ip, &er);
947
948 kfree(value); 805 kfree(value);
949 kfree(name); 806 kfree(name);
950 807
@@ -991,13 +848,10 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
991 if (error) 848 if (error)
992 goto fail_gunlock; 849 goto fail_gunlock;
993 850
994 error = pick_formal_ino(sdp, &inum.no_formal_ino);
995 if (error)
996 goto fail_gunlock;
997
998 error = alloc_dinode(dip, &inum.no_addr, &generation); 851 error = alloc_dinode(dip, &inum.no_addr, &generation);
999 if (error) 852 if (error)
1000 goto fail_gunlock; 853 goto fail_gunlock;
854 inum.no_formal_ino = generation;
1001 855
1002 error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops, 856 error = gfs2_glock_nq_num(sdp, inum.no_addr, &gfs2_inode_glops,
1003 LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1); 857 LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
@@ -1008,9 +862,8 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
1008 if (error) 862 if (error)
1009 goto fail_gunlock2; 863 goto fail_gunlock2;
1010 864
1011 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), 865 inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
1012 inum.no_addr, 866 inum.no_formal_ino, 0);
1013 inum.no_formal_ino, 0);
1014 if (IS_ERR(inode)) 867 if (IS_ERR(inode))
1015 goto fail_gunlock2; 868 goto fail_gunlock2;
1016 869
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 7bc3c45cd676..52fb6c048981 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -84,7 +84,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
84 84
85 gfs2_tune_init(&sdp->sd_tune); 85 gfs2_tune_init(&sdp->sd_tune);
86 86
87 mutex_init(&sdp->sd_inum_mutex);
88 spin_lock_init(&sdp->sd_statfs_spin); 87 spin_lock_init(&sdp->sd_statfs_spin);
89 88
90 spin_lock_init(&sdp->sd_rindex_spin); 89 spin_lock_init(&sdp->sd_rindex_spin);
@@ -833,21 +832,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
833 if (error) 832 if (error)
834 goto fail; 833 goto fail;
835 834
836 /* Read in the master inode number inode */
837 sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum");
838 if (IS_ERR(sdp->sd_inum_inode)) {
839 error = PTR_ERR(sdp->sd_inum_inode);
840 fs_err(sdp, "can't read in inum inode: %d\n", error);
841 goto fail_journal;
842 }
843
844
845 /* Read in the master statfs inode */ 835 /* Read in the master statfs inode */
846 sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); 836 sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs");
847 if (IS_ERR(sdp->sd_statfs_inode)) { 837 if (IS_ERR(sdp->sd_statfs_inode)) {
848 error = PTR_ERR(sdp->sd_statfs_inode); 838 error = PTR_ERR(sdp->sd_statfs_inode);
849 fs_err(sdp, "can't read in statfs inode: %d\n", error); 839 fs_err(sdp, "can't read in statfs inode: %d\n", error);
850 goto fail_inum; 840 goto fail_journal;
851 } 841 }
852 842
853 /* Read in the resource index inode */ 843 /* Read in the resource index inode */
@@ -876,8 +866,6 @@ fail_rindex:
876 iput(sdp->sd_rindex); 866 iput(sdp->sd_rindex);
877fail_statfs: 867fail_statfs:
878 iput(sdp->sd_statfs_inode); 868 iput(sdp->sd_statfs_inode);
879fail_inum:
880 iput(sdp->sd_inum_inode);
881fail_journal: 869fail_journal:
882 init_journal(sdp, UNDO); 870 init_journal(sdp, UNDO);
883fail: 871fail:
@@ -905,20 +893,12 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
905 return error; 893 return error;
906 } 894 }
907 895
908 sprintf(buf, "inum_range%u", sdp->sd_jdesc->jd_jid);
909 sdp->sd_ir_inode = gfs2_lookup_simple(pn, buf);
910 if (IS_ERR(sdp->sd_ir_inode)) {
911 error = PTR_ERR(sdp->sd_ir_inode);
912 fs_err(sdp, "can't find local \"ir\" file: %d\n", error);
913 goto fail;
914 }
915
916 sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid); 896 sprintf(buf, "statfs_change%u", sdp->sd_jdesc->jd_jid);
917 sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf); 897 sdp->sd_sc_inode = gfs2_lookup_simple(pn, buf);
918 if (IS_ERR(sdp->sd_sc_inode)) { 898 if (IS_ERR(sdp->sd_sc_inode)) {
919 error = PTR_ERR(sdp->sd_sc_inode); 899 error = PTR_ERR(sdp->sd_sc_inode);
920 fs_err(sdp, "can't find local \"sc\" file: %d\n", error); 900 fs_err(sdp, "can't find local \"sc\" file: %d\n", error);
921 goto fail_ir_i; 901 goto fail;
922 } 902 }
923 903
924 sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid); 904 sprintf(buf, "quota_change%u", sdp->sd_jdesc->jd_jid);
@@ -932,27 +912,16 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo)
932 iput(pn); 912 iput(pn);
933 pn = NULL; 913 pn = NULL;
934 914
935 ip = GFS2_I(sdp->sd_ir_inode);
936 error = gfs2_glock_nq_init(ip->i_gl,
937 LM_ST_EXCLUSIVE, 0,
938 &sdp->sd_ir_gh);
939 if (error) {
940 fs_err(sdp, "can't lock local \"ir\" file: %d\n", error);
941 goto fail_qc_i;
942 }
943
944 ip = GFS2_I(sdp->sd_sc_inode); 915 ip = GFS2_I(sdp->sd_sc_inode);
945 error = gfs2_glock_nq_init(ip->i_gl, 916 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
946 LM_ST_EXCLUSIVE, 0,
947 &sdp->sd_sc_gh); 917 &sdp->sd_sc_gh);
948 if (error) { 918 if (error) {
949 fs_err(sdp, "can't lock local \"sc\" file: %d\n", error); 919 fs_err(sdp, "can't lock local \"sc\" file: %d\n", error);
950 goto fail_ir_gh; 920 goto fail_qc_i;
951 } 921 }
952 922
953 ip = GFS2_I(sdp->sd_qc_inode); 923 ip = GFS2_I(sdp->sd_qc_inode);
954 error = gfs2_glock_nq_init(ip->i_gl, 924 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0,
955 LM_ST_EXCLUSIVE, 0,
956 &sdp->sd_qc_gh); 925 &sdp->sd_qc_gh);
957 if (error) { 926 if (error) {
958 fs_err(sdp, "can't lock local \"qc\" file: %d\n", error); 927 fs_err(sdp, "can't lock local \"qc\" file: %d\n", error);
@@ -965,14 +934,10 @@ fail_qc_gh:
965 gfs2_glock_dq_uninit(&sdp->sd_qc_gh); 934 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
966fail_ut_gh: 935fail_ut_gh:
967 gfs2_glock_dq_uninit(&sdp->sd_sc_gh); 936 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
968fail_ir_gh:
969 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
970fail_qc_i: 937fail_qc_i:
971 iput(sdp->sd_qc_inode); 938 iput(sdp->sd_qc_inode);
972fail_ut_i: 939fail_ut_i:
973 iput(sdp->sd_sc_inode); 940 iput(sdp->sd_sc_inode);
974fail_ir_i:
975 iput(sdp->sd_ir_inode);
976fail: 941fail:
977 if (pn) 942 if (pn)
978 iput(pn); 943 iput(pn);
@@ -1063,7 +1028,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
1063 1028
1064 ls->ls_ops = lm; 1029 ls->ls_ops = lm;
1065 ls->ls_first = 1; 1030 ls->ls_first = 1;
1066 ls->ls_id = 0;
1067 1031
1068 for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) { 1032 for (options = args->ar_hostdata; (o = strsep(&options, ":")); ) {
1069 substring_t tmp[MAX_OPT_ARGS]; 1033 substring_t tmp[MAX_OPT_ARGS];
@@ -1081,10 +1045,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent)
1081 ls->ls_jid = option; 1045 ls->ls_jid = option;
1082 break; 1046 break;
1083 case Opt_id: 1047 case Opt_id:
1084 ret = match_int(&tmp[0], &option); 1048 /* Obsolete, but left for backward compat purposes */
1085 if (ret)
1086 goto hostdata_error;
1087 ls->ls_id = option;
1088 break; 1049 break;
1089 case Opt_first: 1050 case Opt_first:
1090 ret = match_int(&tmp[0], &option); 1051 ret = match_int(&tmp[0], &option);
@@ -1133,6 +1094,17 @@ void gfs2_lm_unmount(struct gfs2_sbd *sdp)
1133 lm->lm_unmount(sdp); 1094 lm->lm_unmount(sdp);
1134} 1095}
1135 1096
1097void gfs2_online_uevent(struct gfs2_sbd *sdp)
1098{
1099 struct super_block *sb = sdp->sd_vfs;
1100 char ro[20];
1101 char spectator[20];
1102 char *envp[] = { ro, spectator, NULL };
1103 sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
1104 sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
1105 kobject_uevent_env(&sdp->sd_kobj, KOBJ_ONLINE, envp);
1106}
1107
1136/** 1108/**
1137 * fill_super - Read in superblock 1109 * fill_super - Read in superblock
1138 * @sb: The VFS superblock 1110 * @sb: The VFS superblock
@@ -1157,6 +1129,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1157 sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; 1129 sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT;
1158 sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; 1130 sdp->sd_args.ar_data = GFS2_DATA_DEFAULT;
1159 sdp->sd_args.ar_commit = 60; 1131 sdp->sd_args.ar_commit = 60;
1132 sdp->sd_args.ar_errors = GFS2_ERRORS_DEFAULT;
1160 1133
1161 error = gfs2_mount_args(sdp, &sdp->sd_args, data); 1134 error = gfs2_mount_args(sdp, &sdp->sd_args, data);
1162 if (error) { 1135 if (error) {
@@ -1174,6 +1147,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1174 sb->s_magic = GFS2_MAGIC; 1147 sb->s_magic = GFS2_MAGIC;
1175 sb->s_op = &gfs2_super_ops; 1148 sb->s_op = &gfs2_super_ops;
1176 sb->s_export_op = &gfs2_export_ops; 1149 sb->s_export_op = &gfs2_export_ops;
1150 sb->s_xattr = gfs2_xattr_handlers;
1177 sb->s_time_gran = 1; 1151 sb->s_time_gran = 1;
1178 sb->s_maxbytes = MAX_LFS_FILESIZE; 1152 sb->s_maxbytes = MAX_LFS_FILESIZE;
1179 1153
@@ -1236,7 +1210,7 @@ static int fill_super(struct super_block *sb, void *data, int silent)
1236 } 1210 }
1237 1211
1238 gfs2_glock_dq_uninit(&mount_gh); 1212 gfs2_glock_dq_uninit(&mount_gh);
1239 1213 gfs2_online_uevent(sdp);
1240 return 0; 1214 return 0;
1241 1215
1242fail_threads: 1216fail_threads:
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index f8bd20baf99c..c3ac18054057 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -26,8 +26,7 @@
26#include "acl.h" 26#include "acl.h"
27#include "bmap.h" 27#include "bmap.h"
28#include "dir.h" 28#include "dir.h"
29#include "eaops.h" 29#include "xattr.h"
30#include "eattr.h"
31#include "glock.h" 30#include "glock.h"
32#include "inode.h" 31#include "inode.h"
33#include "meta_io.h" 32#include "meta_io.h"
@@ -349,7 +348,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
349 348
350 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); 349 error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0);
351 if (error) 350 if (error)
352 goto out_rgrp; 351 goto out_gunlock;
353 352
354 error = gfs2_dir_del(dip, &dentry->d_name); 353 error = gfs2_dir_del(dip, &dentry->d_name);
355 if (error) 354 if (error)
@@ -1302,60 +1301,53 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name,
1302 const void *data, size_t size, int flags) 1301 const void *data, size_t size, int flags)
1303{ 1302{
1304 struct inode *inode = dentry->d_inode; 1303 struct inode *inode = dentry->d_inode;
1305 struct gfs2_ea_request er; 1304 struct gfs2_inode *ip = GFS2_I(inode);
1306 1305 struct gfs2_holder gh;
1307 memset(&er, 0, sizeof(struct gfs2_ea_request)); 1306 int ret;
1308 er.er_type = gfs2_ea_name2type(name, &er.er_name);
1309 if (er.er_type == GFS2_EATYPE_UNUSED)
1310 return -EOPNOTSUPP;
1311 er.er_data = (char *)data;
1312 er.er_name_len = strlen(er.er_name);
1313 er.er_data_len = size;
1314 er.er_flags = flags;
1315
1316 gfs2_assert_warn(GFS2_SB(inode), !(er.er_flags & GFS2_ERF_MODE));
1317 1307
1318 return gfs2_ea_set(GFS2_I(inode), &er); 1308 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1309 ret = gfs2_glock_nq(&gh);
1310 if (ret == 0) {
1311 ret = generic_setxattr(dentry, name, data, size, flags);
1312 gfs2_glock_dq(&gh);
1313 }
1314 gfs2_holder_uninit(&gh);
1315 return ret;
1319} 1316}
1320 1317
1321static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, 1318static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
1322 void *data, size_t size) 1319 void *data, size_t size)
1323{ 1320{
1324 struct gfs2_ea_request er; 1321 struct inode *inode = dentry->d_inode;
1325 1322 struct gfs2_inode *ip = GFS2_I(inode);
1326 memset(&er, 0, sizeof(struct gfs2_ea_request)); 1323 struct gfs2_holder gh;
1327 er.er_type = gfs2_ea_name2type(name, &er.er_name); 1324 int ret;
1328 if (er.er_type == GFS2_EATYPE_UNUSED)
1329 return -EOPNOTSUPP;
1330 er.er_data = data;
1331 er.er_name_len = strlen(er.er_name);
1332 er.er_data_len = size;
1333
1334 return gfs2_ea_get(GFS2_I(dentry->d_inode), &er);
1335}
1336
1337static ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
1338{
1339 struct gfs2_ea_request er;
1340
1341 memset(&er, 0, sizeof(struct gfs2_ea_request));
1342 er.er_data = (size) ? buffer : NULL;
1343 er.er_data_len = size;
1344 1325
1345 return gfs2_ea_list(GFS2_I(dentry->d_inode), &er); 1326 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
1327 ret = gfs2_glock_nq(&gh);
1328 if (ret == 0) {
1329 ret = generic_getxattr(dentry, name, data, size);
1330 gfs2_glock_dq(&gh);
1331 }
1332 gfs2_holder_uninit(&gh);
1333 return ret;
1346} 1334}
1347 1335
1348static int gfs2_removexattr(struct dentry *dentry, const char *name) 1336static int gfs2_removexattr(struct dentry *dentry, const char *name)
1349{ 1337{
1350 struct gfs2_ea_request er; 1338 struct inode *inode = dentry->d_inode;
1351 1339 struct gfs2_inode *ip = GFS2_I(inode);
1352 memset(&er, 0, sizeof(struct gfs2_ea_request)); 1340 struct gfs2_holder gh;
1353 er.er_type = gfs2_ea_name2type(name, &er.er_name); 1341 int ret;
1354 if (er.er_type == GFS2_EATYPE_UNUSED)
1355 return -EOPNOTSUPP;
1356 er.er_name_len = strlen(er.er_name);
1357 1342
1358 return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er); 1343 gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
1344 ret = gfs2_glock_nq(&gh);
1345 if (ret == 0) {
1346 ret = generic_removexattr(dentry, name);
1347 gfs2_glock_dq(&gh);
1348 }
1349 gfs2_holder_uninit(&gh);
1350 return ret;
1359} 1351}
1360 1352
1361static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1353static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index fba795798d3a..28c590b7c9da 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -857,7 +857,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
857 goto start_new_extent; 857 goto start_new_extent;
858 if ((start + nr_sects) != blk) { 858 if ((start + nr_sects) != blk) {
859 rv = blkdev_issue_discard(bdev, start, 859 rv = blkdev_issue_discard(bdev, start,
860 nr_sects, GFP_NOFS); 860 nr_sects, GFP_NOFS,
861 DISCARD_FL_BARRIER);
861 if (rv) 862 if (rv)
862 goto fail; 863 goto fail;
863 nr_sects = 0; 864 nr_sects = 0;
@@ -871,7 +872,8 @@ start_new_extent:
871 } 872 }
872 } 873 }
873 if (nr_sects) { 874 if (nr_sects) {
874 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS); 875 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
876 DISCARD_FL_BARRIER);
875 if (rv) 877 if (rv)
876 goto fail; 878 goto fail;
877 } 879 }
@@ -1256,7 +1258,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip)
1256 * Returns: The block type (GFS2_BLKST_*) 1258 * Returns: The block type (GFS2_BLKST_*)
1257 */ 1259 */
1258 1260
1259unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block) 1261static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)
1260{ 1262{
1261 struct gfs2_bitmap *bi = NULL; 1263 struct gfs2_bitmap *bi = NULL;
1262 u32 length, rgrp_block, buf_block; 1264 u32 length, rgrp_block, buf_block;
@@ -1459,6 +1461,16 @@ int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)
1459 return 0; 1461 return 0;
1460} 1462}
1461 1463
1464static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)
1465{
1466 struct gfs2_sbd *sdp = rgd->rd_sbd;
1467 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n",
1468 (unsigned long long)rgd->rd_addr);
1469 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
1470 gfs2_rgrp_dump(NULL, rgd->rd_gl);
1471 rgd->rd_flags |= GFS2_RDF_ERROR;
1472}
1473
1462/** 1474/**
1463 * gfs2_alloc_block - Allocate one or more blocks 1475 * gfs2_alloc_block - Allocate one or more blocks
1464 * @ip: the inode to allocate the block for 1476 * @ip: the inode to allocate the block for
@@ -1520,22 +1532,20 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
1520 return 0; 1532 return 0;
1521 1533
1522rgrp_error: 1534rgrp_error:
1523 fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", 1535 gfs2_rgrp_error(rgd);
1524 (unsigned long long)rgd->rd_addr);
1525 fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n");
1526 gfs2_rgrp_dump(NULL, rgd->rd_gl);
1527 rgd->rd_flags |= GFS2_RDF_ERROR;
1528 return -EIO; 1536 return -EIO;
1529} 1537}
1530 1538
1531/** 1539/**
1532 * gfs2_alloc_di - Allocate a dinode 1540 * gfs2_alloc_di - Allocate a dinode
1533 * @dip: the directory that the inode is going in 1541 * @dip: the directory that the inode is going in
1542 * @bn: the block number which is allocated
1543 * @generation: the generation number of the inode
1534 * 1544 *
1535 * Returns: the block allocated 1545 * Returns: 0 on success or error
1536 */ 1546 */
1537 1547
1538u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation) 1548int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation)
1539{ 1549{
1540 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); 1550 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1541 struct gfs2_alloc *al = dip->i_alloc; 1551 struct gfs2_alloc *al = dip->i_alloc;
@@ -1546,16 +1556,21 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1546 1556
1547 blk = rgblk_search(rgd, rgd->rd_last_alloc, 1557 blk = rgblk_search(rgd, rgd->rd_last_alloc,
1548 GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n); 1558 GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n);
1549 BUG_ON(blk == BFITNOENT);
1550 1559
1551 rgd->rd_last_alloc = blk; 1560 /* Since all blocks are reserved in advance, this shouldn't happen */
1561 if (blk == BFITNOENT)
1562 goto rgrp_error;
1552 1563
1564 rgd->rd_last_alloc = blk;
1553 block = rgd->rd_data0 + blk; 1565 block = rgd->rd_data0 + blk;
1566 if (rgd->rd_free == 0)
1567 goto rgrp_error;
1554 1568
1555 gfs2_assert_withdraw(sdp, rgd->rd_free);
1556 rgd->rd_free--; 1569 rgd->rd_free--;
1557 rgd->rd_dinodes++; 1570 rgd->rd_dinodes++;
1558 *generation = rgd->rd_igeneration++; 1571 *generation = rgd->rd_igeneration++;
1572 if (*generation == 0)
1573 *generation = rgd->rd_igeneration++;
1559 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); 1574 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1560 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); 1575 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1561 1576
@@ -1568,7 +1583,12 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
1568 rgd->rd_free_clone--; 1583 rgd->rd_free_clone--;
1569 spin_unlock(&sdp->sd_rindex_spin); 1584 spin_unlock(&sdp->sd_rindex_spin);
1570 trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); 1585 trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
1571 return block; 1586 *bn = block;
1587 return 0;
1588
1589rgrp_error:
1590 gfs2_rgrp_error(rgd);
1591 return -EIO;
1572} 1592}
1573 1593
1574/** 1594/**
@@ -1676,6 +1696,46 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
1676} 1696}
1677 1697
1678/** 1698/**
1699 * gfs2_check_blk_type - Check the type of a block
1700 * @sdp: The superblock
1701 * @no_addr: The block number to check
1702 * @type: The block type we are looking for
1703 *
1704 * Returns: 0 if the block type matches the expected type
1705 * -ESTALE if it doesn't match
1706 * or -ve errno if something went wrong while checking
1707 */
1708
1709int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
1710{
1711 struct gfs2_rgrpd *rgd;
1712 struct gfs2_holder ri_gh, rgd_gh;
1713 int error;
1714
1715 error = gfs2_rindex_hold(sdp, &ri_gh);
1716 if (error)
1717 goto fail;
1718
1719 error = -EINVAL;
1720 rgd = gfs2_blk2rgrpd(sdp, no_addr);
1721 if (!rgd)
1722 goto fail_rindex;
1723
1724 error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
1725 if (error)
1726 goto fail_rindex;
1727
1728 if (gfs2_get_block_type(rgd, no_addr) != type)
1729 error = -ESTALE;
1730
1731 gfs2_glock_dq_uninit(&rgd_gh);
1732fail_rindex:
1733 gfs2_glock_dq_uninit(&ri_gh);
1734fail:
1735 return error;
1736}
1737
1738/**
1679 * gfs2_rlist_add - add a RG to a list of RGs 1739 * gfs2_rlist_add - add a RG to a list of RGs
1680 * @sdp: the filesystem 1740 * @sdp: the filesystem
1681 * @rlist: the list of resource groups 1741 * @rlist: the list of resource groups
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index 1e76ff0f3e00..b4106ddaaa98 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -44,15 +44,15 @@ gfs2_inplace_reserve_i((ip), __FILE__, __LINE__)
44 44
45extern void gfs2_inplace_release(struct gfs2_inode *ip); 45extern void gfs2_inplace_release(struct gfs2_inode *ip);
46 46
47extern unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block);
48
49extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 47extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
50extern u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); 48extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
51 49
52extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); 50extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
53extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 51extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
54extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); 52extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
55extern void gfs2_unlink_di(struct inode *inode); 53extern void gfs2_unlink_di(struct inode *inode);
54extern int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr,
55 unsigned int type);
56 56
57struct gfs2_rgrp_list { 57struct gfs2_rgrp_list {
58 unsigned int rl_rgrps; 58 unsigned int rl_rgrps;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index f522bb017973..0ec3ec672de1 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -38,7 +38,7 @@
38#include "trans.h" 38#include "trans.h"
39#include "util.h" 39#include "util.h"
40#include "sys.h" 40#include "sys.h"
41#include "eattr.h" 41#include "xattr.h"
42 42
43#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x) 43#define args_neq(a1, a2, x) ((a1)->ar_##x != (a2)->ar_##x)
44 44
@@ -68,6 +68,8 @@ enum {
68 Opt_discard, 68 Opt_discard,
69 Opt_nodiscard, 69 Opt_nodiscard,
70 Opt_commit, 70 Opt_commit,
71 Opt_err_withdraw,
72 Opt_err_panic,
71 Opt_error, 73 Opt_error,
72}; 74};
73 75
@@ -97,6 +99,8 @@ static const match_table_t tokens = {
97 {Opt_discard, "discard"}, 99 {Opt_discard, "discard"},
98 {Opt_nodiscard, "nodiscard"}, 100 {Opt_nodiscard, "nodiscard"},
99 {Opt_commit, "commit=%d"}, 101 {Opt_commit, "commit=%d"},
102 {Opt_err_withdraw, "errors=withdraw"},
103 {Opt_err_panic, "errors=panic"},
100 {Opt_error, NULL} 104 {Opt_error, NULL}
101}; 105};
102 106
@@ -152,6 +156,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
152 args->ar_localcaching = 1; 156 args->ar_localcaching = 1;
153 break; 157 break;
154 case Opt_debug: 158 case Opt_debug:
159 if (args->ar_errors == GFS2_ERRORS_PANIC) {
160 fs_info(sdp, "-o debug and -o errors=panic "
161 "are mutually exclusive.\n");
162 return -EINVAL;
163 }
155 args->ar_debug = 1; 164 args->ar_debug = 1;
156 break; 165 break;
157 case Opt_nodebug: 166 case Opt_nodebug:
@@ -205,6 +214,17 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options)
205 return rv ? rv : -EINVAL; 214 return rv ? rv : -EINVAL;
206 } 215 }
207 break; 216 break;
217 case Opt_err_withdraw:
218 args->ar_errors = GFS2_ERRORS_WITHDRAW;
219 break;
220 case Opt_err_panic:
221 if (args->ar_debug) {
222 fs_info(sdp, "-o debug and -o errors=panic "
223 "are mutually exclusive.\n");
224 return -EINVAL;
225 }
226 args->ar_errors = GFS2_ERRORS_PANIC;
227 break;
208 case Opt_error: 228 case Opt_error:
209 default: 229 default:
210 fs_info(sdp, "invalid mount option: %s\n", o); 230 fs_info(sdp, "invalid mount option: %s\n", o);
@@ -768,7 +788,6 @@ restart:
768 /* Release stuff */ 788 /* Release stuff */
769 789
770 iput(sdp->sd_jindex); 790 iput(sdp->sd_jindex);
771 iput(sdp->sd_inum_inode);
772 iput(sdp->sd_statfs_inode); 791 iput(sdp->sd_statfs_inode);
773 iput(sdp->sd_rindex); 792 iput(sdp->sd_rindex);
774 iput(sdp->sd_quota_inode); 793 iput(sdp->sd_quota_inode);
@@ -779,10 +798,8 @@ restart:
779 if (!sdp->sd_args.ar_spectator) { 798 if (!sdp->sd_args.ar_spectator) {
780 gfs2_glock_dq_uninit(&sdp->sd_journal_gh); 799 gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
781 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); 800 gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
782 gfs2_glock_dq_uninit(&sdp->sd_ir_gh);
783 gfs2_glock_dq_uninit(&sdp->sd_sc_gh); 801 gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
784 gfs2_glock_dq_uninit(&sdp->sd_qc_gh); 802 gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
785 iput(sdp->sd_ir_inode);
786 iput(sdp->sd_sc_inode); 803 iput(sdp->sd_sc_inode);
787 iput(sdp->sd_qc_inode); 804 iput(sdp->sd_qc_inode);
788 } 805 }
@@ -1084,6 +1101,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
1084 gt->gt_log_flush_secs = args.ar_commit; 1101 gt->gt_log_flush_secs = args.ar_commit;
1085 spin_unlock(&gt->gt_spin); 1102 spin_unlock(&gt->gt_spin);
1086 1103
1104 gfs2_online_uevent(sdp);
1087 return 0; 1105 return 0;
1088} 1106}
1089 1107
@@ -1225,6 +1243,22 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1225 lfsecs = sdp->sd_tune.gt_log_flush_secs; 1243 lfsecs = sdp->sd_tune.gt_log_flush_secs;
1226 if (lfsecs != 60) 1244 if (lfsecs != 60)
1227 seq_printf(s, ",commit=%d", lfsecs); 1245 seq_printf(s, ",commit=%d", lfsecs);
1246 if (args->ar_errors != GFS2_ERRORS_DEFAULT) {
1247 const char *state;
1248
1249 switch (args->ar_errors) {
1250 case GFS2_ERRORS_WITHDRAW:
1251 state = "withdraw";
1252 break;
1253 case GFS2_ERRORS_PANIC:
1254 state = "panic";
1255 break;
1256 default:
1257 state = "unknown";
1258 break;
1259 }
1260 seq_printf(s, ",errors=%s", state);
1261 }
1228 return 0; 1262 return 0;
1229} 1263}
1230 1264
@@ -1252,6 +1286,10 @@ static void gfs2_delete_inode(struct inode *inode)
1252 goto out; 1286 goto out;
1253 } 1287 }
1254 1288
1289 error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
1290 if (error)
1291 goto out_truncate;
1292
1255 gfs2_glock_dq_wait(&ip->i_iopen_gh); 1293 gfs2_glock_dq_wait(&ip->i_iopen_gh);
1256 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); 1294 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
1257 error = gfs2_glock_nq(&ip->i_iopen_gh); 1295 error = gfs2_glock_nq(&ip->i_iopen_gh);
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index 22e0417ed996..235db3682885 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -25,7 +25,7 @@ static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
25 return x; 25 return x;
26} 26}
27 27
28void gfs2_jindex_free(struct gfs2_sbd *sdp); 28extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
29 29
30extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data); 30extern int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *data);
31 31
@@ -36,7 +36,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
36 struct gfs2_inode **ipp); 36 struct gfs2_inode **ipp);
37 37
38extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp); 38extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
39 39extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
40extern int gfs2_statfs_init(struct gfs2_sbd *sdp); 40extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
41extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free, 41extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
42 s64 dinodes); 42 s64 dinodes);
@@ -54,6 +54,7 @@ extern struct file_system_type gfs2meta_fs_type;
54extern const struct export_operations gfs2_export_ops; 54extern const struct export_operations gfs2_export_ops;
55extern const struct super_operations gfs2_super_ops; 55extern const struct super_operations gfs2_super_ops;
56extern const struct dentry_operations gfs2_dops; 56extern const struct dentry_operations gfs2_dops;
57extern struct xattr_handler *gfs2_xattr_handlers[];
57 58
58#endif /* __SUPER_DOT_H__ */ 59#endif /* __SUPER_DOT_H__ */
59 60
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index a7cbfbd340c7..446329728d52 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -16,6 +16,7 @@
16#include <linux/kobject.h> 16#include <linux/kobject.h>
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18#include <linux/gfs2_ondisk.h> 18#include <linux/gfs2_ondisk.h>
19#include <linux/genhd.h>
19 20
20#include "gfs2.h" 21#include "gfs2.h"
21#include "incore.h" 22#include "incore.h"
@@ -319,12 +320,6 @@ static ssize_t block_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
319 return ret; 320 return ret;
320} 321}
321 322
322static ssize_t lkid_show(struct gfs2_sbd *sdp, char *buf)
323{
324 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
325 return sprintf(buf, "%u\n", ls->ls_id);
326}
327
328static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf) 323static ssize_t lkfirst_show(struct gfs2_sbd *sdp, char *buf)
329{ 324{
330 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 325 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
@@ -389,7 +384,6 @@ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store)
389GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); 384GDLM_ATTR(proto_name, 0444, proto_name_show, NULL);
390GDLM_ATTR(block, 0644, block_show, block_store); 385GDLM_ATTR(block, 0644, block_show, block_store);
391GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); 386GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store);
392GDLM_ATTR(id, 0444, lkid_show, NULL);
393GDLM_ATTR(jid, 0444, jid_show, NULL); 387GDLM_ATTR(jid, 0444, jid_show, NULL);
394GDLM_ATTR(first, 0444, lkfirst_show, NULL); 388GDLM_ATTR(first, 0444, lkfirst_show, NULL);
395GDLM_ATTR(first_done, 0444, first_done_show, NULL); 389GDLM_ATTR(first_done, 0444, first_done_show, NULL);
@@ -401,7 +395,6 @@ static struct attribute *lock_module_attrs[] = {
401 &gdlm_attr_proto_name.attr, 395 &gdlm_attr_proto_name.attr,
402 &gdlm_attr_block.attr, 396 &gdlm_attr_block.attr,
403 &gdlm_attr_withdraw.attr, 397 &gdlm_attr_withdraw.attr,
404 &gdlm_attr_id.attr,
405 &gdlm_attr_jid.attr, 398 &gdlm_attr_jid.attr,
406 &gdlm_attr_first.attr, 399 &gdlm_attr_first.attr,
407 &gdlm_attr_first_done.attr, 400 &gdlm_attr_first_done.attr,
@@ -519,7 +512,14 @@ static struct attribute_group lock_module_group = {
519 512
520int gfs2_sys_fs_add(struct gfs2_sbd *sdp) 513int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
521{ 514{
515 struct super_block *sb = sdp->sd_vfs;
522 int error; 516 int error;
517 char ro[20];
518 char spectator[20];
519 char *envp[] = { ro, spectator, NULL };
520
521 sprintf(ro, "RDONLY=%d", (sb->s_flags & MS_RDONLY) ? 1 : 0);
522 sprintf(spectator, "SPECTATOR=%d", sdp->sd_args.ar_spectator ? 1 : 0);
523 523
524 sdp->sd_kobj.kset = gfs2_kset; 524 sdp->sd_kobj.kset = gfs2_kset;
525 error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL, 525 error = kobject_init_and_add(&sdp->sd_kobj, &gfs2_ktype, NULL,
@@ -535,9 +535,17 @@ int gfs2_sys_fs_add(struct gfs2_sbd *sdp)
535 if (error) 535 if (error)
536 goto fail_tune; 536 goto fail_tune;
537 537
538 kobject_uevent(&sdp->sd_kobj, KOBJ_ADD); 538 error = sysfs_create_link(&sdp->sd_kobj,
539 &disk_to_dev(sb->s_bdev->bd_disk)->kobj,
540 "device");
541 if (error)
542 goto fail_lock_module;
543
544 kobject_uevent_env(&sdp->sd_kobj, KOBJ_ADD, envp);
539 return 0; 545 return 0;
540 546
547fail_lock_module:
548 sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
541fail_tune: 549fail_tune:
542 sysfs_remove_group(&sdp->sd_kobj, &tune_group); 550 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
543fail_reg: 551fail_reg:
@@ -549,12 +557,12 @@ fail:
549 557
550void gfs2_sys_fs_del(struct gfs2_sbd *sdp) 558void gfs2_sys_fs_del(struct gfs2_sbd *sdp)
551{ 559{
560 sysfs_remove_link(&sdp->sd_kobj, "device");
552 sysfs_remove_group(&sdp->sd_kobj, &tune_group); 561 sysfs_remove_group(&sdp->sd_kobj, &tune_group);
553 sysfs_remove_group(&sdp->sd_kobj, &lock_module_group); 562 sysfs_remove_group(&sdp->sd_kobj, &lock_module_group);
554 kobject_put(&sdp->sd_kobj); 563 kobject_put(&sdp->sd_kobj);
555} 564}
556 565
557
558static int gfs2_uevent(struct kset *kset, struct kobject *kobj, 566static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
559 struct kobj_uevent_env *env) 567 struct kobj_uevent_env *env)
560{ 568{
@@ -563,6 +571,8 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj,
563 571
564 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); 572 add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name);
565 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); 573 add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name);
574 if (!sdp->sd_args.ar_spectator)
575 add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid);
566 if (gfs2_uuid_valid(uuid)) { 576 if (gfs2_uuid_valid(uuid)) {
567 add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-" 577 add_uevent_var(env, "UUID=%02X%02X%02X%02X-%02X%02X-%02X%02X-"
568 "%02X%02X-%02X%02X%02X%02X%02X%02X", 578 "%02X%02X-%02X%02X%02X%02X%02X%02X",
@@ -578,7 +588,6 @@ static struct kset_uevent_ops gfs2_uevent_ops = {
578 .uevent = gfs2_uevent, 588 .uevent = gfs2_uevent,
579}; 589};
580 590
581
582int gfs2_sys_init(void) 591int gfs2_sys_init(void)
583{ 592{
584 gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj); 593 gfs2_kset = kset_create_and_add("gfs2", &gfs2_uevent_ops, fs_kobj);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 9d12b1118ba0..f6a7efa34eb9 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -38,24 +38,30 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
38 const struct lm_lockops *lm = ls->ls_ops; 38 const struct lm_lockops *lm = ls->ls_ops;
39 va_list args; 39 va_list args;
40 40
41 if (test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) 41 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
42 test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags))
42 return 0; 43 return 0;
43 44
44 va_start(args, fmt); 45 va_start(args, fmt);
45 vprintk(fmt, args); 46 vprintk(fmt, args);
46 va_end(args); 47 va_end(args);
47 48
48 fs_err(sdp, "about to withdraw this file system\n"); 49 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
49 BUG_ON(sdp->sd_args.ar_debug); 50 fs_err(sdp, "about to withdraw this file system\n");
51 BUG_ON(sdp->sd_args.ar_debug);
50 52
51 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); 53 kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
52 54
53 if (lm->lm_unmount) { 55 if (lm->lm_unmount) {
54 fs_err(sdp, "telling LM to unmount\n"); 56 fs_err(sdp, "telling LM to unmount\n");
55 lm->lm_unmount(sdp); 57 lm->lm_unmount(sdp);
58 }
59 fs_err(sdp, "withdrawn\n");
60 dump_stack();
56 } 61 }
57 fs_err(sdp, "withdrawn\n"); 62
58 dump_stack(); 63 if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
64 panic("GFS2: fsid=%s: panic requested.\n", sdp->sd_fsname);
59 65
60 return -1; 66 return -1;
61} 67}
@@ -93,17 +99,24 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
93 gfs2_tune_get(sdp, gt_complain_secs) * HZ)) 99 gfs2_tune_get(sdp, gt_complain_secs) * HZ))
94 return -2; 100 return -2;
95 101
96 printk(KERN_WARNING 102 if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
97 "GFS2: fsid=%s: warning: assertion \"%s\" failed\n" 103 printk(KERN_WARNING
98 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", 104 "GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
99 sdp->sd_fsname, assertion, 105 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
100 sdp->sd_fsname, function, file, line); 106 sdp->sd_fsname, assertion,
107 sdp->sd_fsname, function, file, line);
101 108
102 if (sdp->sd_args.ar_debug) 109 if (sdp->sd_args.ar_debug)
103 BUG(); 110 BUG();
104 else 111 else
105 dump_stack(); 112 dump_stack();
106 113
114 if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
115 panic("GFS2: fsid=%s: warning: assertion \"%s\" failed\n"
116 "GFS2: fsid=%s: function = %s, file = %s, line = %u\n",
117 sdp->sd_fsname, assertion,
118 sdp->sd_fsname, function, file, line);
119
107 sdp->sd_last_warning = jiffies; 120 sdp->sd_last_warning = jiffies;
108 121
109 return -1; 122 return -1;
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/xattr.c
index 07ea9529adda..8a0f8ef6ee27 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/xattr.c
@@ -18,8 +18,7 @@
18#include "gfs2.h" 18#include "gfs2.h"
19#include "incore.h" 19#include "incore.h"
20#include "acl.h" 20#include "acl.h"
21#include "eaops.h" 21#include "xattr.h"
22#include "eattr.h"
23#include "glock.h" 22#include "glock.h"
24#include "inode.h" 23#include "inode.h"
25#include "meta_io.h" 24#include "meta_io.h"
@@ -38,26 +37,32 @@
38 * Returns: 1 if the EA should be stuffed 37 * Returns: 1 if the EA should be stuffed
39 */ 38 */
40 39
41static int ea_calc_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er, 40static int ea_calc_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize,
42 unsigned int *size) 41 unsigned int *size)
43{ 42{
44 *size = GFS2_EAREQ_SIZE_STUFFED(er); 43 unsigned int jbsize = sdp->sd_jbsize;
45 if (*size <= sdp->sd_jbsize) 44
45 /* Stuffed */
46 *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize + dsize, 8);
47
48 if (*size <= jbsize)
46 return 1; 49 return 1;
47 50
48 *size = GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er); 51 /* Unstuffed */
52 *size = ALIGN(sizeof(struct gfs2_ea_header) + nsize +
53 (sizeof(__be64) * DIV_ROUND_UP(dsize, jbsize)), 8);
49 54
50 return 0; 55 return 0;
51} 56}
52 57
53static int ea_check_size(struct gfs2_sbd *sdp, struct gfs2_ea_request *er) 58static int ea_check_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize)
54{ 59{
55 unsigned int size; 60 unsigned int size;
56 61
57 if (er->er_data_len > GFS2_EA_MAX_DATA_LEN) 62 if (dsize > GFS2_EA_MAX_DATA_LEN)
58 return -ERANGE; 63 return -ERANGE;
59 64
60 ea_calc_size(sdp, er, &size); 65 ea_calc_size(sdp, nsize, dsize, &size);
61 66
62 /* This can only happen with 512 byte blocks */ 67 /* This can only happen with 512 byte blocks */
63 if (size > sdp->sd_jbsize) 68 if (size > sdp->sd_jbsize)
@@ -151,7 +156,9 @@ out:
151} 156}
152 157
153struct ea_find { 158struct ea_find {
154 struct gfs2_ea_request *ef_er; 159 int type;
160 const char *name;
161 size_t namel;
155 struct gfs2_ea_location *ef_el; 162 struct gfs2_ea_location *ef_el;
156}; 163};
157 164
@@ -160,14 +167,13 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
160 void *private) 167 void *private)
161{ 168{
162 struct ea_find *ef = private; 169 struct ea_find *ef = private;
163 struct gfs2_ea_request *er = ef->ef_er;
164 170
165 if (ea->ea_type == GFS2_EATYPE_UNUSED) 171 if (ea->ea_type == GFS2_EATYPE_UNUSED)
166 return 0; 172 return 0;
167 173
168 if (ea->ea_type == er->er_type) { 174 if (ea->ea_type == ef->type) {
169 if (ea->ea_name_len == er->er_name_len && 175 if (ea->ea_name_len == ef->namel &&
170 !memcmp(GFS2_EA2NAME(ea), er->er_name, ea->ea_name_len)) { 176 !memcmp(GFS2_EA2NAME(ea), ef->name, ea->ea_name_len)) {
171 struct gfs2_ea_location *el = ef->ef_el; 177 struct gfs2_ea_location *el = ef->ef_el;
172 get_bh(bh); 178 get_bh(bh);
173 el->el_bh = bh; 179 el->el_bh = bh;
@@ -180,13 +186,15 @@ static int ea_find_i(struct gfs2_inode *ip, struct buffer_head *bh,
180 return 0; 186 return 0;
181} 187}
182 188
183int gfs2_ea_find(struct gfs2_inode *ip, struct gfs2_ea_request *er, 189int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name,
184 struct gfs2_ea_location *el) 190 struct gfs2_ea_location *el)
185{ 191{
186 struct ea_find ef; 192 struct ea_find ef;
187 int error; 193 int error;
188 194
189 ef.ef_er = er; 195 ef.type = type;
196 ef.name = name;
197 ef.namel = strlen(name);
190 ef.ef_el = el; 198 ef.ef_el = el;
191 199
192 memset(el, 0, sizeof(struct gfs2_ea_location)); 200 memset(el, 0, sizeof(struct gfs2_ea_location));
@@ -344,6 +352,20 @@ struct ea_list {
344 unsigned int ei_size; 352 unsigned int ei_size;
345}; 353};
346 354
355static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
356{
357 switch (ea->ea_type) {
358 case GFS2_EATYPE_USR:
359 return 5 + ea->ea_name_len + 1;
360 case GFS2_EATYPE_SYS:
361 return 7 + ea->ea_name_len + 1;
362 case GFS2_EATYPE_SECURITY:
363 return 9 + ea->ea_name_len + 1;
364 default:
365 return 0;
366 }
367}
368
347static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh, 369static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
348 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev, 370 struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
349 void *private) 371 void *private)
@@ -392,21 +414,25 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
392} 414}
393 415
394/** 416/**
395 * gfs2_ea_list - 417 * gfs2_listxattr - List gfs2 extended attributes
396 * @ip: 418 * @dentry: The dentry whose inode we are interested in
397 * @er: 419 * @buffer: The buffer to write the results
420 * @size: The size of the buffer
398 * 421 *
399 * Returns: actual size of data on success, -errno on error 422 * Returns: actual size of data on success, -errno on error
400 */ 423 */
401 424
402int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er) 425ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
403{ 426{
427 struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
428 struct gfs2_ea_request er;
404 struct gfs2_holder i_gh; 429 struct gfs2_holder i_gh;
405 int error; 430 int error;
406 431
407 if (!er->er_data || !er->er_data_len) { 432 memset(&er, 0, sizeof(struct gfs2_ea_request));
408 er->er_data = NULL; 433 if (size) {
409 er->er_data_len = 0; 434 er.er_data = buffer;
435 er.er_data_len = size;
410 } 436 }
411 437
412 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 438 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
@@ -414,7 +440,7 @@ int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er)
414 return error; 440 return error;
415 441
416 if (ip->i_eattr) { 442 if (ip->i_eattr) {
417 struct ea_list ei = { .ei_er = er, .ei_size = 0 }; 443 struct ea_list ei = { .ei_er = &er, .ei_size = 0 };
418 444
419 error = ea_foreach(ip, ea_list_i, &ei); 445 error = ea_foreach(ip, ea_list_i, &ei);
420 if (!error) 446 if (!error)
@@ -491,84 +517,61 @@ out:
491} 517}
492 518
493int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el, 519int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
494 char *data) 520 char *data, size_t size)
495{ 521{
522 int ret;
523 size_t len = GFS2_EA_DATA_LEN(el->el_ea);
524 if (len > size)
525 return -ERANGE;
526
496 if (GFS2_EA_IS_STUFFED(el->el_ea)) { 527 if (GFS2_EA_IS_STUFFED(el->el_ea)) {
497 memcpy(data, GFS2_EA2DATA(el->el_ea), GFS2_EA_DATA_LEN(el->el_ea)); 528 memcpy(data, GFS2_EA2DATA(el->el_ea), len);
498 return 0; 529 return len;
499 } else 530 }
500 return ea_get_unstuffed(ip, el->el_ea, data); 531 ret = ea_get_unstuffed(ip, el->el_ea, data);
532 if (ret < 0)
533 return ret;
534 return len;
501} 535}
502 536
503/** 537/**
504 * gfs2_ea_get_i - 538 * gfs2_xattr_get - Get a GFS2 extended attribute
505 * @ip: The GFS2 inode 539 * @inode: The inode
506 * @er: The request structure 540 * @type: The type of extended attribute
541 * @name: The name of the extended attribute
542 * @buffer: The buffer to write the result into
543 * @size: The size of the buffer
507 * 544 *
508 * Returns: actual size of data on success, -errno on error 545 * Returns: actual size of data on success, -errno on error
509 */ 546 */
510 547
511int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) 548int gfs2_xattr_get(struct inode *inode, int type, const char *name,
549 void *buffer, size_t size)
512{ 550{
551 struct gfs2_inode *ip = GFS2_I(inode);
513 struct gfs2_ea_location el; 552 struct gfs2_ea_location el;
514 int error; 553 int error;
515 554
516 if (!ip->i_eattr) 555 if (!ip->i_eattr)
517 return -ENODATA; 556 return -ENODATA;
557 if (strlen(name) > GFS2_EA_MAX_NAME_LEN)
558 return -EINVAL;
518 559
519 error = gfs2_ea_find(ip, er, &el); 560 error = gfs2_ea_find(ip, type, name, &el);
520 if (error) 561 if (error)
521 return error; 562 return error;
522 if (!el.el_ea) 563 if (!el.el_ea)
523 return -ENODATA; 564 return -ENODATA;
524 565 if (size)
525 if (er->er_data_len) { 566 error = gfs2_ea_get_copy(ip, &el, buffer, size);
526 if (GFS2_EA_DATA_LEN(el.el_ea) > er->er_data_len) 567 else
527 error = -ERANGE;
528 else
529 error = gfs2_ea_get_copy(ip, &el, er->er_data);
530 }
531 if (!error)
532 error = GFS2_EA_DATA_LEN(el.el_ea); 568 error = GFS2_EA_DATA_LEN(el.el_ea);
533
534 brelse(el.el_bh); 569 brelse(el.el_bh);
535 570
536 return error; 571 return error;
537} 572}
538 573
539/** 574/**
540 * gfs2_ea_get -
541 * @ip: The GFS2 inode
542 * @er: The request structure
543 *
544 * Returns: actual size of data on success, -errno on error
545 */
546
547int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er)
548{
549 struct gfs2_holder i_gh;
550 int error;
551
552 if (!er->er_name_len ||
553 er->er_name_len > GFS2_EA_MAX_NAME_LEN)
554 return -EINVAL;
555 if (!er->er_data || !er->er_data_len) {
556 er->er_data = NULL;
557 er->er_data_len = 0;
558 }
559
560 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
561 if (error)
562 return error;
563
564 error = gfs2_ea_ops[er->er_type]->eo_get(ip, er);
565
566 gfs2_glock_dq_uninit(&i_gh);
567
568 return error;
569}
570
571/**
572 * ea_alloc_blk - allocates a new block for extended attributes. 575 * ea_alloc_blk - allocates a new block for extended attributes.
573 * @ip: A pointer to the inode that's getting extended attributes 576 * @ip: A pointer to the inode that's getting extended attributes
574 * @bhp: Pointer to pointer to a struct buffer_head 577 * @bhp: Pointer to pointer to a struct buffer_head
@@ -713,12 +716,6 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
713 716
714 error = gfs2_meta_inode_buffer(ip, &dibh); 717 error = gfs2_meta_inode_buffer(ip, &dibh);
715 if (!error) { 718 if (!error) {
716 if (er->er_flags & GFS2_ERF_MODE) {
717 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
718 (ip->i_inode.i_mode & S_IFMT) ==
719 (er->er_mode & S_IFMT));
720 ip->i_inode.i_mode = er->er_mode;
721 }
722 ip->i_inode.i_ctime = CURRENT_TIME; 719 ip->i_inode.i_ctime = CURRENT_TIME;
723 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 720 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
724 gfs2_dinode_out(ip, dibh->b_data); 721 gfs2_dinode_out(ip, dibh->b_data);
@@ -762,15 +759,23 @@ static int ea_init_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
762 * Returns: errno 759 * Returns: errno
763 */ 760 */
764 761
765static int ea_init(struct gfs2_inode *ip, struct gfs2_ea_request *er) 762static int ea_init(struct gfs2_inode *ip, int type, const char *name,
763 const void *data, size_t size)
766{ 764{
765 struct gfs2_ea_request er;
767 unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize; 766 unsigned int jbsize = GFS2_SB(&ip->i_inode)->sd_jbsize;
768 unsigned int blks = 1; 767 unsigned int blks = 1;
769 768
770 if (GFS2_EAREQ_SIZE_STUFFED(er) > jbsize) 769 er.er_type = type;
771 blks += DIV_ROUND_UP(er->er_data_len, jbsize); 770 er.er_name = name;
771 er.er_name_len = strlen(name);
772 er.er_data = (void *)data;
773 er.er_data_len = size;
774
775 if (GFS2_EAREQ_SIZE_STUFFED(&er) > jbsize)
776 blks += DIV_ROUND_UP(er.er_data_len, jbsize);
772 777
773 return ea_alloc_skeleton(ip, er, blks, ea_init_i, NULL); 778 return ea_alloc_skeleton(ip, &er, blks, ea_init_i, NULL);
774} 779}
775 780
776static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea) 781static struct gfs2_ea_header *ea_split_ea(struct gfs2_ea_header *ea)
@@ -848,12 +853,6 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
848 error = gfs2_meta_inode_buffer(ip, &dibh); 853 error = gfs2_meta_inode_buffer(ip, &dibh);
849 if (error) 854 if (error)
850 goto out; 855 goto out;
851
852 if (er->er_flags & GFS2_ERF_MODE) {
853 gfs2_assert_withdraw(GFS2_SB(&ip->i_inode),
854 (ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
855 ip->i_inode.i_mode = er->er_mode;
856 }
857 ip->i_inode.i_ctime = CURRENT_TIME; 856 ip->i_inode.i_ctime = CURRENT_TIME;
858 gfs2_trans_add_bh(ip->i_gl, dibh, 1); 857 gfs2_trans_add_bh(ip->i_gl, dibh, 1);
859 gfs2_dinode_out(ip, dibh->b_data); 858 gfs2_dinode_out(ip, dibh->b_data);
@@ -894,7 +893,8 @@ static int ea_set_simple(struct gfs2_inode *ip, struct buffer_head *bh,
894 int stuffed; 893 int stuffed;
895 int error; 894 int error;
896 895
897 stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er, &size); 896 stuffed = ea_calc_size(GFS2_SB(&ip->i_inode), es->es_er->er_name_len,
897 es->es_er->er_data_len, &size);
898 898
899 if (ea->ea_type == GFS2_EATYPE_UNUSED) { 899 if (ea->ea_type == GFS2_EATYPE_UNUSED) {
900 if (GFS2_EA_REC_LEN(ea) < size) 900 if (GFS2_EA_REC_LEN(ea) < size)
@@ -1005,15 +1005,22 @@ out:
1005 return error; 1005 return error;
1006} 1006}
1007 1007
1008static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er, 1008static int ea_set_i(struct gfs2_inode *ip, int type, const char *name,
1009 struct gfs2_ea_location *el) 1009 const void *value, size_t size, struct gfs2_ea_location *el)
1010{ 1010{
1011 struct gfs2_ea_request er;
1011 struct ea_set es; 1012 struct ea_set es;
1012 unsigned int blks = 2; 1013 unsigned int blks = 2;
1013 int error; 1014 int error;
1014 1015
1016 er.er_type = type;
1017 er.er_name = name;
1018 er.er_data = (void *)value;
1019 er.er_name_len = strlen(name);
1020 er.er_data_len = size;
1021
1015 memset(&es, 0, sizeof(struct ea_set)); 1022 memset(&es, 0, sizeof(struct ea_set));
1016 es.es_er = er; 1023 es.es_er = &er;
1017 es.es_el = el; 1024 es.es_el = el;
1018 1025
1019 error = ea_foreach(ip, ea_set_simple, &es); 1026 error = ea_foreach(ip, ea_set_simple, &es);
@@ -1024,10 +1031,10 @@ static int ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er,
1024 1031
1025 if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT)) 1032 if (!(ip->i_diskflags & GFS2_DIF_EA_INDIRECT))
1026 blks++; 1033 blks++;
1027 if (GFS2_EAREQ_SIZE_STUFFED(er) > GFS2_SB(&ip->i_inode)->sd_jbsize) 1034 if (GFS2_EAREQ_SIZE_STUFFED(&er) > GFS2_SB(&ip->i_inode)->sd_jbsize)
1028 blks += DIV_ROUND_UP(er->er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize); 1035 blks += DIV_ROUND_UP(er.er_data_len, GFS2_SB(&ip->i_inode)->sd_jbsize);
1029 1036
1030 return ea_alloc_skeleton(ip, er, blks, ea_set_block, el); 1037 return ea_alloc_skeleton(ip, &er, blks, ea_set_block, el);
1031} 1038}
1032 1039
1033static int ea_set_remove_unstuffed(struct gfs2_inode *ip, 1040static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
@@ -1039,75 +1046,7 @@ static int ea_set_remove_unstuffed(struct gfs2_inode *ip,
1039 GFS2_EA2NEXT(el->el_prev) == el->el_ea); 1046 GFS2_EA2NEXT(el->el_prev) == el->el_ea);
1040 } 1047 }
1041 1048
1042 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev,0); 1049 return ea_remove_unstuffed(ip, el->el_bh, el->el_ea, el->el_prev, 0);
1043}
1044
1045int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1046{
1047 struct gfs2_ea_location el;
1048 int error;
1049
1050 if (!ip->i_eattr) {
1051 if (er->er_flags & XATTR_REPLACE)
1052 return -ENODATA;
1053 return ea_init(ip, er);
1054 }
1055
1056 error = gfs2_ea_find(ip, er, &el);
1057 if (error)
1058 return error;
1059
1060 if (el.el_ea) {
1061 if (ip->i_diskflags & GFS2_DIF_APPENDONLY) {
1062 brelse(el.el_bh);
1063 return -EPERM;
1064 }
1065
1066 error = -EEXIST;
1067 if (!(er->er_flags & XATTR_CREATE)) {
1068 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
1069 error = ea_set_i(ip, er, &el);
1070 if (!error && unstuffed)
1071 ea_set_remove_unstuffed(ip, &el);
1072 }
1073
1074 brelse(el.el_bh);
1075 } else {
1076 error = -ENODATA;
1077 if (!(er->er_flags & XATTR_REPLACE))
1078 error = ea_set_i(ip, er, NULL);
1079 }
1080
1081 return error;
1082}
1083
1084int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1085{
1086 struct gfs2_holder i_gh;
1087 int error;
1088
1089 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN)
1090 return -EINVAL;
1091 if (!er->er_data || !er->er_data_len) {
1092 er->er_data = NULL;
1093 er->er_data_len = 0;
1094 }
1095 error = ea_check_size(GFS2_SB(&ip->i_inode), er);
1096 if (error)
1097 return error;
1098
1099 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
1100 if (error)
1101 return error;
1102
1103 if (IS_IMMUTABLE(&ip->i_inode))
1104 error = -EPERM;
1105 else
1106 error = gfs2_ea_ops[er->er_type]->eo_set(ip, er);
1107
1108 gfs2_glock_dq_uninit(&i_gh);
1109
1110 return error;
1111} 1050}
1112 1051
1113static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el) 1052static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
@@ -1131,8 +1070,9 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1131 1070
1132 if (GFS2_EA_IS_LAST(ea)) 1071 if (GFS2_EA_IS_LAST(ea))
1133 prev->ea_flags |= GFS2_EAFLAG_LAST; 1072 prev->ea_flags |= GFS2_EAFLAG_LAST;
1134 } else 1073 } else {
1135 ea->ea_type = GFS2_EATYPE_UNUSED; 1074 ea->ea_type = GFS2_EATYPE_UNUSED;
1075 }
1136 1076
1137 error = gfs2_meta_inode_buffer(ip, &dibh); 1077 error = gfs2_meta_inode_buffer(ip, &dibh);
1138 if (!error) { 1078 if (!error) {
@@ -1147,15 +1087,29 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
1147 return error; 1087 return error;
1148} 1088}
1149 1089
1150int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er) 1090/**
1091 * gfs2_xattr_remove - Remove a GFS2 extended attribute
1092 * @inode: The inode
1093 * @type: The type of the extended attribute
1094 * @name: The name of the extended attribute
1095 *
1096 * This is not called directly by the VFS since we use the (common)
1097 * scheme of making a "set with NULL data" mean a remove request. Note
1098 * that this is different from a set with zero length data.
1099 *
1100 * Returns: 0, or errno on failure
1101 */
1102
1103static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
1151{ 1104{
1105 struct gfs2_inode *ip = GFS2_I(inode);
1152 struct gfs2_ea_location el; 1106 struct gfs2_ea_location el;
1153 int error; 1107 int error;
1154 1108
1155 if (!ip->i_eattr) 1109 if (!ip->i_eattr)
1156 return -ENODATA; 1110 return -ENODATA;
1157 1111
1158 error = gfs2_ea_find(ip, er, &el); 1112 error = gfs2_ea_find(ip, type, name, &el);
1159 if (error) 1113 if (error)
1160 return error; 1114 return error;
1161 if (!el.el_ea) 1115 if (!el.el_ea)
@@ -1164,8 +1118,7 @@ int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1164 if (GFS2_EA_IS_STUFFED(el.el_ea)) 1118 if (GFS2_EA_IS_STUFFED(el.el_ea))
1165 error = ea_remove_stuffed(ip, &el); 1119 error = ea_remove_stuffed(ip, &el);
1166 else 1120 else
1167 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, 1121 error = ea_remove_unstuffed(ip, el.el_bh, el.el_ea, el.el_prev, 0);
1168 0);
1169 1122
1170 brelse(el.el_bh); 1123 brelse(el.el_bh);
1171 1124
@@ -1173,31 +1126,70 @@ int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er)
1173} 1126}
1174 1127
1175/** 1128/**
1176 * gfs2_ea_remove - sets (or creates or replaces) an extended attribute 1129 * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
1177 * @ip: pointer to the inode of the target file 1130 * @inode: The inode
1178 * @er: request information 1131 * @type: The type of the extended attribute
1132 * @name: The name of the extended attribute
1133 * @value: The value of the extended attribute (NULL for remove)
1134 * @size: The size of the @value argument
1135 * @flags: Create or Replace
1179 * 1136 *
1180 * Returns: errno 1137 * See gfs2_xattr_remove() for details of the removal of xattrs.
1138 *
1139 * Returns: 0 or errno on failure
1181 */ 1140 */
1182 1141
1183int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) 1142int gfs2_xattr_set(struct inode *inode, int type, const char *name,
1143 const void *value, size_t size, int flags)
1184{ 1144{
1185 struct gfs2_holder i_gh; 1145 struct gfs2_sbd *sdp = GFS2_SB(inode);
1146 struct gfs2_inode *ip = GFS2_I(inode);
1147 struct gfs2_ea_location el;
1148 unsigned int namel = strlen(name);
1186 int error; 1149 int error;
1187 1150
1188 if (!er->er_name_len || er->er_name_len > GFS2_EA_MAX_NAME_LEN) 1151 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
1189 return -EINVAL; 1152 return -EPERM;
1153 if (namel > GFS2_EA_MAX_NAME_LEN)
1154 return -ERANGE;
1190 1155
1191 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); 1156 if (value == NULL)
1157 return gfs2_xattr_remove(inode, type, name);
1158
1159 if (ea_check_size(sdp, namel, size))
1160 return -ERANGE;
1161
1162 if (!ip->i_eattr) {
1163 if (flags & XATTR_REPLACE)
1164 return -ENODATA;
1165 return ea_init(ip, type, name, value, size);
1166 }
1167
1168 error = gfs2_ea_find(ip, type, name, &el);
1192 if (error) 1169 if (error)
1193 return error; 1170 return error;
1194 1171
1195 if (IS_IMMUTABLE(&ip->i_inode) || IS_APPEND(&ip->i_inode)) 1172 if (el.el_ea) {
1196 error = -EPERM; 1173 if (ip->i_diskflags & GFS2_DIF_APPENDONLY) {
1197 else 1174 brelse(el.el_bh);
1198 error = gfs2_ea_ops[er->er_type]->eo_remove(ip, er); 1175 return -EPERM;
1176 }
1199 1177
1200 gfs2_glock_dq_uninit(&i_gh); 1178 error = -EEXIST;
1179 if (!(flags & XATTR_CREATE)) {
1180 int unstuffed = !GFS2_EA_IS_STUFFED(el.el_ea);
1181 error = ea_set_i(ip, type, name, value, size, &el);
1182 if (!error && unstuffed)
1183 ea_set_remove_unstuffed(ip, &el);
1184 }
1185
1186 brelse(el.el_bh);
1187 return error;
1188 }
1189
1190 error = -ENODATA;
1191 if (!(flags & XATTR_REPLACE))
1192 error = ea_set_i(ip, type, name, value, size, NULL);
1201 1193
1202 return error; 1194 return error;
1203} 1195}
@@ -1503,3 +1495,64 @@ out_alloc:
1503 return error; 1495 return error;
1504} 1496}
1505 1497
1498static int gfs2_xattr_user_get(struct inode *inode, const char *name,
1499 void *buffer, size_t size)
1500{
1501 return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size);
1502}
1503
1504static int gfs2_xattr_user_set(struct inode *inode, const char *name,
1505 const void *value, size_t size, int flags)
1506{
1507 return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
1508}
1509
1510static int gfs2_xattr_system_get(struct inode *inode, const char *name,
1511 void *buffer, size_t size)
1512{
1513 return gfs2_xattr_get(inode, GFS2_EATYPE_SYS, name, buffer, size);
1514}
1515
1516static int gfs2_xattr_system_set(struct inode *inode, const char *name,
1517 const void *value, size_t size, int flags)
1518{
1519 return gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, flags);
1520}
1521
1522static int gfs2_xattr_security_get(struct inode *inode, const char *name,
1523 void *buffer, size_t size)
1524{
1525 return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size);
1526}
1527
1528static int gfs2_xattr_security_set(struct inode *inode, const char *name,
1529 const void *value, size_t size, int flags)
1530{
1531 return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags);
1532}
1533
1534static struct xattr_handler gfs2_xattr_user_handler = {
1535 .prefix = XATTR_USER_PREFIX,
1536 .get = gfs2_xattr_user_get,
1537 .set = gfs2_xattr_user_set,
1538};
1539
1540static struct xattr_handler gfs2_xattr_security_handler = {
1541 .prefix = XATTR_SECURITY_PREFIX,
1542 .get = gfs2_xattr_security_get,
1543 .set = gfs2_xattr_security_set,
1544};
1545
1546static struct xattr_handler gfs2_xattr_system_handler = {
1547 .prefix = XATTR_SYSTEM_PREFIX,
1548 .get = gfs2_xattr_system_get,
1549 .set = gfs2_xattr_system_set,
1550};
1551
1552struct xattr_handler *gfs2_xattr_handlers[] = {
1553 &gfs2_xattr_user_handler,
1554 &gfs2_xattr_security_handler,
1555 &gfs2_xattr_system_handler,
1556 NULL,
1557};
1558
diff --git a/fs/gfs2/eattr.h b/fs/gfs2/xattr.h
index c82dbe01d713..cbdfd7743733 100644
--- a/fs/gfs2/eattr.h
+++ b/fs/gfs2/xattr.h
@@ -19,7 +19,7 @@ struct iattr;
19#define GFS2_EA_SIZE(ea) \ 19#define GFS2_EA_SIZE(ea) \
20ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \ 20ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
21 ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \ 21 ((GFS2_EA_IS_STUFFED(ea)) ? GFS2_EA_DATA_LEN(ea) : \
22 (sizeof(__be64) * (ea)->ea_num_ptrs)), 8) 22 (sizeof(__be64) * (ea)->ea_num_ptrs)), 8)
23 23
24#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs) 24#define GFS2_EA_IS_STUFFED(ea) (!(ea)->ea_num_ptrs)
25#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST) 25#define GFS2_EA_IS_LAST(ea) ((ea)->ea_flags & GFS2_EAFLAG_LAST)
@@ -27,10 +27,6 @@ ALIGN(sizeof(struct gfs2_ea_header) + (ea)->ea_name_len + \
27#define GFS2_EAREQ_SIZE_STUFFED(er) \ 27#define GFS2_EAREQ_SIZE_STUFFED(er) \
28ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8) 28ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + (er)->er_data_len, 8)
29 29
30#define GFS2_EAREQ_SIZE_UNSTUFFED(sdp, er) \
31ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
32 sizeof(__be64) * DIV_ROUND_UP((er)->er_data_len, (sdp)->sd_jbsize), 8)
33
34#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1)) 30#define GFS2_EA2NAME(ea) ((char *)((struct gfs2_ea_header *)(ea) + 1))
35#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len) 31#define GFS2_EA2DATA(ea) (GFS2_EA2NAME(ea) + (ea)->ea_name_len)
36 32
@@ -43,16 +39,12 @@ ALIGN(sizeof(struct gfs2_ea_header) + (er)->er_name_len + \
43#define GFS2_EA_BH2FIRST(bh) \ 39#define GFS2_EA_BH2FIRST(bh) \
44((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header))) 40((struct gfs2_ea_header *)((bh)->b_data + sizeof(struct gfs2_meta_header)))
45 41
46#define GFS2_ERF_MODE 0x80000000
47
48struct gfs2_ea_request { 42struct gfs2_ea_request {
49 const char *er_name; 43 const char *er_name;
50 char *er_data; 44 char *er_data;
51 unsigned int er_name_len; 45 unsigned int er_name_len;
52 unsigned int er_data_len; 46 unsigned int er_data_len;
53 unsigned int er_type; /* GFS2_EATYPE_... */ 47 unsigned int er_type; /* GFS2_EATYPE_... */
54 int er_flags;
55 mode_t er_mode;
56}; 48};
57 49
58struct gfs2_ea_location { 50struct gfs2_ea_location {
@@ -61,40 +53,20 @@ struct gfs2_ea_location {
61 struct gfs2_ea_header *el_prev; 53 struct gfs2_ea_header *el_prev;
62}; 54};
63 55
64int gfs2_ea_get_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); 56extern int gfs2_xattr_get(struct inode *inode, int type, const char *name,
65int gfs2_ea_set_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); 57 void *buffer, size_t size);
66int gfs2_ea_remove_i(struct gfs2_inode *ip, struct gfs2_ea_request *er); 58extern int gfs2_xattr_set(struct inode *inode, int type, const char *name,
67 59 const void *value, size_t size, int flags);
68int gfs2_ea_list(struct gfs2_inode *ip, struct gfs2_ea_request *er); 60extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
69int gfs2_ea_get(struct gfs2_inode *ip, struct gfs2_ea_request *er); 61extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
70int gfs2_ea_set(struct gfs2_inode *ip, struct gfs2_ea_request *er);
71int gfs2_ea_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er);
72
73int gfs2_ea_dealloc(struct gfs2_inode *ip);
74 62
75/* Exported to acl.c */ 63/* Exported to acl.c */
76 64
77int gfs2_ea_find(struct gfs2_inode *ip, 65extern int gfs2_ea_find(struct gfs2_inode *ip, int type, const char *name,
78 struct gfs2_ea_request *er, 66 struct gfs2_ea_location *el);
79 struct gfs2_ea_location *el); 67extern int gfs2_ea_get_copy(struct gfs2_inode *ip, struct gfs2_ea_location *el,
80int gfs2_ea_get_copy(struct gfs2_inode *ip, 68 char *data, size_t size);
81 struct gfs2_ea_location *el, 69extern int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
82 char *data); 70 struct iattr *attr, char *data);
83int gfs2_ea_acl_chmod(struct gfs2_inode *ip, struct gfs2_ea_location *el,
84 struct iattr *attr, char *data);
85
86static inline unsigned int gfs2_ea_strlen(struct gfs2_ea_header *ea)
87{
88 switch (ea->ea_type) {
89 case GFS2_EATYPE_USR:
90 return 5 + ea->ea_name_len + 1;
91 case GFS2_EATYPE_SYS:
92 return 7 + ea->ea_name_len + 1;
93 case GFS2_EATYPE_SECURITY:
94 return 9 + ea->ea_name_len + 1;
95 default:
96 return 0;
97 }
98}
99 71
100#endif /* __EATTR_DOT_H__ */ 72#endif /* __EATTR_DOT_H__ */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cb88dac8ccaa..a93b885311d8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -44,6 +44,7 @@ static const struct inode_operations hugetlbfs_dir_inode_operations;
44static const struct inode_operations hugetlbfs_inode_operations; 44static const struct inode_operations hugetlbfs_inode_operations;
45 45
46static struct backing_dev_info hugetlbfs_backing_dev_info = { 46static struct backing_dev_info hugetlbfs_backing_dev_info = {
47 .name = "hugetlbfs",
47 .ra_pages = 0, /* No readahead */ 48 .ra_pages = 0, /* No readahead */
48 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 49 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
49}; 50};
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 8fcb6239218e..7edb62e97419 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -258,7 +258,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
258 return rc; 258 return rc;
259} 259}
260 260
261static int jffs2_check_acl(struct inode *inode, int mask) 261int jffs2_check_acl(struct inode *inode, int mask)
262{ 262{
263 struct posix_acl *acl; 263 struct posix_acl *acl;
264 int rc; 264 int rc;
@@ -274,11 +274,6 @@ static int jffs2_check_acl(struct inode *inode, int mask)
274 return -EAGAIN; 274 return -EAGAIN;
275} 275}
276 276
277int jffs2_permission(struct inode *inode, int mask)
278{
279 return generic_permission(inode, mask, jffs2_check_acl);
280}
281
282int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) 277int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
283{ 278{
284 struct posix_acl *acl, *clone; 279 struct posix_acl *acl, *clone;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index fc929f2a14f6..f0ba63e3c36b 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@ struct jffs2_acl_header {
26 26
27#ifdef CONFIG_JFFS2_FS_POSIX_ACL 27#ifdef CONFIG_JFFS2_FS_POSIX_ACL
28 28
29extern int jffs2_permission(struct inode *, int); 29extern int jffs2_check_acl(struct inode *, int);
30extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
32extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
@@ -36,7 +36,7 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
36 36
37#else 37#else
38 38
39#define jffs2_permission (NULL) 39#define jffs2_check_acl (NULL)
40#define jffs2_acl_chmod(inode) (0) 40#define jffs2_acl_chmod(inode) (0)
41#define jffs2_init_acl_pre(dir_i,inode,mode) (0) 41#define jffs2_init_acl_pre(dir_i,inode,mode) (0)
42#define jffs2_init_acl_post(inode) (0) 42#define jffs2_init_acl_post(inode) (0)
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 6f60cc910f4c..7aa4417e085f 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -55,7 +55,7 @@ const struct inode_operations jffs2_dir_inode_operations =
55 .rmdir = jffs2_rmdir, 55 .rmdir = jffs2_rmdir,
56 .mknod = jffs2_mknod, 56 .mknod = jffs2_mknod,
57 .rename = jffs2_rename, 57 .rename = jffs2_rename,
58 .permission = jffs2_permission, 58 .check_acl = jffs2_check_acl,
59 .setattr = jffs2_setattr, 59 .setattr = jffs2_setattr,
60 .setxattr = jffs2_setxattr, 60 .setxattr = jffs2_setxattr,
61 .getxattr = jffs2_getxattr, 61 .getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 23c947539864..b7b74e299142 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -56,7 +56,7 @@ const struct file_operations jffs2_file_operations =
56 56
57const struct inode_operations jffs2_file_inode_operations = 57const struct inode_operations jffs2_file_inode_operations =
58{ 58{
59 .permission = jffs2_permission, 59 .check_acl = jffs2_check_acl,
60 .setattr = jffs2_setattr, 60 .setattr = jffs2_setattr,
61 .setxattr = jffs2_setxattr, 61 .setxattr = jffs2_setxattr,
62 .getxattr = jffs2_getxattr, 62 .getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index b7339c3b6ad9..4ec11e8bda8c 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -21,7 +21,7 @@ const struct inode_operations jffs2_symlink_inode_operations =
21{ 21{
22 .readlink = generic_readlink, 22 .readlink = generic_readlink,
23 .follow_link = jffs2_follow_link, 23 .follow_link = jffs2_follow_link,
24 .permission = jffs2_permission, 24 .check_acl = jffs2_check_acl,
25 .setattr = jffs2_setattr, 25 .setattr = jffs2_setattr,
26 .setxattr = jffs2_setxattr, 26 .setxattr = jffs2_setxattr,
27 .getxattr = jffs2_getxattr, 27 .getxattr = jffs2_getxattr,
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index a29c7c3e3fb8..d66477c34306 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,7 +114,7 @@ out:
114 return rc; 114 return rc;
115} 115}
116 116
117static int jfs_check_acl(struct inode *inode, int mask) 117int jfs_check_acl(struct inode *inode, int mask)
118{ 118{
119 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); 119 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
120 120
@@ -129,11 +129,6 @@ static int jfs_check_acl(struct inode *inode, int mask)
129 return -EAGAIN; 129 return -EAGAIN;
130} 130}
131 131
132int jfs_permission(struct inode *inode, int mask)
133{
134 return generic_permission(inode, mask, jfs_check_acl);
135}
136
137int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) 132int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
138{ 133{
139 struct posix_acl *acl = NULL; 134 struct posix_acl *acl = NULL;
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 7f6063acaa3b..2b70fa78e4a7 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -96,7 +96,7 @@ const struct inode_operations jfs_file_inode_operations = {
96 .removexattr = jfs_removexattr, 96 .removexattr = jfs_removexattr,
97#ifdef CONFIG_JFS_POSIX_ACL 97#ifdef CONFIG_JFS_POSIX_ACL
98 .setattr = jfs_setattr, 98 .setattr = jfs_setattr,
99 .permission = jfs_permission, 99 .check_acl = jfs_check_acl,
100#endif 100#endif
101}; 101};
102 102
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 88475f10a389..b07bd417ef85 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23int jfs_permission(struct inode *, int); 23int jfs_check_acl(struct inode *, int);
24int jfs_init_acl(tid_t, struct inode *, struct inode *); 24int jfs_init_acl(tid_t, struct inode *, struct inode *);
25int jfs_setattr(struct dentry *, struct iattr *); 25int jfs_setattr(struct dentry *, struct iattr *);
26 26
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 514ee2edb92a..c79a4270f083 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1543,7 +1543,7 @@ const struct inode_operations jfs_dir_inode_operations = {
1543 .removexattr = jfs_removexattr, 1543 .removexattr = jfs_removexattr,
1544#ifdef CONFIG_JFS_POSIX_ACL 1544#ifdef CONFIG_JFS_POSIX_ACL
1545 .setattr = jfs_setattr, 1545 .setattr = jfs_setattr,
1546 .permission = jfs_permission, 1546 .check_acl = jfs_check_acl,
1547#endif 1547#endif
1548}; 1548};
1549 1549
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 99d737bd4325..7cb076ac6b45 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -87,18 +87,6 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap)
87 return hash & (NLM_HOST_NRHASH - 1); 87 return hash & (NLM_HOST_NRHASH - 1);
88} 88}
89 89
90static void nlm_clear_port(struct sockaddr *sap)
91{
92 switch (sap->sa_family) {
93 case AF_INET:
94 ((struct sockaddr_in *)sap)->sin_port = 0;
95 break;
96 case AF_INET6:
97 ((struct sockaddr_in6 *)sap)->sin6_port = 0;
98 break;
99 }
100}
101
102/* 90/*
103 * Common host lookup routine for server & client 91 * Common host lookup routine for server & client
104 */ 92 */
@@ -177,7 +165,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
177 host->h_addrbuf = nsm->sm_addrbuf; 165 host->h_addrbuf = nsm->sm_addrbuf;
178 memcpy(nlm_addr(host), ni->sap, ni->salen); 166 memcpy(nlm_addr(host), ni->sap, ni->salen);
179 host->h_addrlen = ni->salen; 167 host->h_addrlen = ni->salen;
180 nlm_clear_port(nlm_addr(host)); 168 rpc_set_port(nlm_addr(host), 0);
181 memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); 169 memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
182 host->h_version = ni->version; 170 host->h_version = ni->version;
183 host->h_proto = ni->protocol; 171 host->h_proto = ni->protocol;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 7fce1b525849..30c933188dd7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,43 +61,6 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
61 return (struct sockaddr *)&nsm->sm_addr; 61 return (struct sockaddr *)&nsm->sm_addr;
62} 62}
63 63
64static void nsm_display_ipv4_address(const struct sockaddr *sap, char *buf,
65 const size_t len)
66{
67 const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
68 snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr);
69}
70
71static void nsm_display_ipv6_address(const struct sockaddr *sap, char *buf,
72 const size_t len)
73{
74 const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
75
76 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
77 snprintf(buf, len, "%pI4", &sin6->sin6_addr.s6_addr32[3]);
78 else if (sin6->sin6_scope_id != 0)
79 snprintf(buf, len, "%pI6%%%u", &sin6->sin6_addr,
80 sin6->sin6_scope_id);
81 else
82 snprintf(buf, len, "%pI6", &sin6->sin6_addr);
83}
84
85static void nsm_display_address(const struct sockaddr *sap,
86 char *buf, const size_t len)
87{
88 switch (sap->sa_family) {
89 case AF_INET:
90 nsm_display_ipv4_address(sap, buf, len);
91 break;
92 case AF_INET6:
93 nsm_display_ipv6_address(sap, buf, len);
94 break;
95 default:
96 snprintf(buf, len, "unsupported address family");
97 break;
98 }
99}
100
101static struct rpc_clnt *nsm_create(void) 64static struct rpc_clnt *nsm_create(void)
102{ 65{
103 struct sockaddr_in sin = { 66 struct sockaddr_in sin = {
@@ -307,8 +270,11 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
307 memcpy(nsm_addr(new), sap, salen); 270 memcpy(nsm_addr(new), sap, salen);
308 new->sm_addrlen = salen; 271 new->sm_addrlen = salen;
309 nsm_init_private(new); 272 nsm_init_private(new);
310 nsm_display_address((const struct sockaddr *)&new->sm_addr, 273
311 new->sm_addrbuf, sizeof(new->sm_addrbuf)); 274 if (rpc_ntop(nsm_addr(new), new->sm_addrbuf,
275 sizeof(new->sm_addrbuf)) == 0)
276 (void)snprintf(new->sm_addrbuf, sizeof(new->sm_addrbuf),
277 "unsupported address family");
312 memcpy(new->sm_name, hostname, hostname_len); 278 memcpy(new->sm_name, hostname, hostname_len);
313 new->sm_name[hostname_len] = '\0'; 279 new->sm_name[hostname_len] = '\0';
314 280
diff --git a/fs/locks.c b/fs/locks.c
index b6440f52178f..19ee18a6829b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -768,7 +768,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
768 * give it the opportunity to lock the file. 768 * give it the opportunity to lock the file.
769 */ 769 */
770 if (found) 770 if (found)
771 cond_resched_bkl(); 771 cond_resched();
772 772
773find_conflict: 773find_conflict:
774 for_each_lock(inode, before) { 774 for_each_lock(inode, before) {
@@ -1591,7 +1591,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1591 if (can_sleep) 1591 if (can_sleep)
1592 lock->fl_flags |= FL_SLEEP; 1592 lock->fl_flags |= FL_SLEEP;
1593 1593
1594 error = security_file_lock(filp, cmd); 1594 error = security_file_lock(filp, lock->fl_type);
1595 if (error) 1595 if (error)
1596 goto out_free; 1596 goto out_free;
1597 1597
diff --git a/fs/namei.c b/fs/namei.c
index f3c5b278895a..d11f404667e9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,19 +169,10 @@ void putname(const char *name)
169EXPORT_SYMBOL(putname); 169EXPORT_SYMBOL(putname);
170#endif 170#endif
171 171
172 172/*
173/** 173 * This does basic POSIX ACL permission checking
174 * generic_permission - check for access rights on a Posix-like filesystem
175 * @inode: inode to check access rights for
176 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
177 * @check_acl: optional callback to check for Posix ACLs
178 *
179 * Used to check for read/write/execute permissions on a file.
180 * We use "fsuid" for this, letting us set arbitrary permissions
181 * for filesystem access without changing the "normal" uids which
182 * are used for other things..
183 */ 174 */
184int generic_permission(struct inode *inode, int mask, 175static int acl_permission_check(struct inode *inode, int mask,
185 int (*check_acl)(struct inode *inode, int mask)) 176 int (*check_acl)(struct inode *inode, int mask))
186{ 177{
187 umode_t mode = inode->i_mode; 178 umode_t mode = inode->i_mode;
@@ -193,9 +184,7 @@ int generic_permission(struct inode *inode, int mask,
193 else { 184 else {
194 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 185 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
195 int error = check_acl(inode, mask); 186 int error = check_acl(inode, mask);
196 if (error == -EACCES) 187 if (error != -EAGAIN)
197 goto check_capabilities;
198 else if (error != -EAGAIN)
199 return error; 188 return error;
200 } 189 }
201 190
@@ -208,8 +197,32 @@ int generic_permission(struct inode *inode, int mask,
208 */ 197 */
209 if ((mask & ~mode) == 0) 198 if ((mask & ~mode) == 0)
210 return 0; 199 return 0;
200 return -EACCES;
201}
202
203/**
204 * generic_permission - check for access rights on a Posix-like filesystem
205 * @inode: inode to check access rights for
206 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
207 * @check_acl: optional callback to check for Posix ACLs
208 *
209 * Used to check for read/write/execute permissions on a file.
210 * We use "fsuid" for this, letting us set arbitrary permissions
211 * for filesystem access without changing the "normal" uids which
212 * are used for other things..
213 */
214int generic_permission(struct inode *inode, int mask,
215 int (*check_acl)(struct inode *inode, int mask))
216{
217 int ret;
218
219 /*
220 * Do the basic POSIX ACL permission checks.
221 */
222 ret = acl_permission_check(inode, mask, check_acl);
223 if (ret != -EACCES)
224 return ret;
211 225
212 check_capabilities:
213 /* 226 /*
214 * Read/write DACs are always overridable. 227 * Read/write DACs are always overridable.
215 * Executable DACs are overridable if at least one exec bit is set. 228 * Executable DACs are overridable if at least one exec bit is set.
@@ -262,7 +275,7 @@ int inode_permission(struct inode *inode, int mask)
262 if (inode->i_op->permission) 275 if (inode->i_op->permission)
263 retval = inode->i_op->permission(inode, mask); 276 retval = inode->i_op->permission(inode, mask);
264 else 277 else
265 retval = generic_permission(inode, mask, NULL); 278 retval = generic_permission(inode, mask, inode->i_op->check_acl);
266 279
267 if (retval) 280 if (retval)
268 return retval; 281 return retval;
@@ -432,29 +445,22 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
432 */ 445 */
433static int exec_permission_lite(struct inode *inode) 446static int exec_permission_lite(struct inode *inode)
434{ 447{
435 umode_t mode = inode->i_mode; 448 int ret;
436 449
437 if (inode->i_op->permission) 450 if (inode->i_op->permission) {
438 return -EAGAIN; 451 ret = inode->i_op->permission(inode, MAY_EXEC);
439 452 if (!ret)
440 if (current_fsuid() == inode->i_uid) 453 goto ok;
441 mode >>= 6; 454 return ret;
442 else if (in_group_p(inode->i_gid)) 455 }
443 mode >>= 3; 456 ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl);
444 457 if (!ret)
445 if (mode & MAY_EXEC)
446 goto ok;
447
448 if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
449 goto ok;
450
451 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
452 goto ok; 458 goto ok;
453 459
454 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) 460 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
455 goto ok; 461 goto ok;
456 462
457 return -EACCES; 463 return ret;
458ok: 464ok:
459 return security_inode_permission(inode, MAY_EXEC); 465 return security_inode_permission(inode, MAY_EXEC);
460} 466}
@@ -853,12 +859,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
853 859
854 nd->flags |= LOOKUP_CONTINUE; 860 nd->flags |= LOOKUP_CONTINUE;
855 err = exec_permission_lite(inode); 861 err = exec_permission_lite(inode);
856 if (err == -EAGAIN)
857 err = inode_permission(nd->path.dentry->d_inode,
858 MAY_EXEC);
859 if (!err)
860 err = ima_path_check(&nd->path, MAY_EXEC,
861 IMA_COUNT_UPDATE);
862 if (err) 862 if (err)
863 break; 863 break;
864 864
@@ -1533,37 +1533,42 @@ int may_open(struct path *path, int acc_mode, int flag)
1533 if (error) 1533 if (error)
1534 return error; 1534 return error;
1535 1535
1536 error = ima_path_check(path, 1536 error = ima_path_check(path, acc_mode ?
1537 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC), 1537 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
1538 ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
1538 IMA_COUNT_UPDATE); 1539 IMA_COUNT_UPDATE);
1540
1539 if (error) 1541 if (error)
1540 return error; 1542 return error;
1541 /* 1543 /*
1542 * An append-only file must be opened in append mode for writing. 1544 * An append-only file must be opened in append mode for writing.
1543 */ 1545 */
1544 if (IS_APPEND(inode)) { 1546 if (IS_APPEND(inode)) {
1547 error = -EPERM;
1545 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1548 if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1546 return -EPERM; 1549 goto err_out;
1547 if (flag & O_TRUNC) 1550 if (flag & O_TRUNC)
1548 return -EPERM; 1551 goto err_out;
1549 } 1552 }
1550 1553
1551 /* O_NOATIME can only be set by the owner or superuser */ 1554 /* O_NOATIME can only be set by the owner or superuser */
1552 if (flag & O_NOATIME) 1555 if (flag & O_NOATIME)
1553 if (!is_owner_or_cap(inode)) 1556 if (!is_owner_or_cap(inode)) {
1554 return -EPERM; 1557 error = -EPERM;
1558 goto err_out;
1559 }
1555 1560
1556 /* 1561 /*
1557 * Ensure there are no outstanding leases on the file. 1562 * Ensure there are no outstanding leases on the file.
1558 */ 1563 */
1559 error = break_lease(inode, flag); 1564 error = break_lease(inode, flag);
1560 if (error) 1565 if (error)
1561 return error; 1566 goto err_out;
1562 1567
1563 if (flag & O_TRUNC) { 1568 if (flag & O_TRUNC) {
1564 error = get_write_access(inode); 1569 error = get_write_access(inode);
1565 if (error) 1570 if (error)
1566 return error; 1571 goto err_out;
1567 1572
1568 /* 1573 /*
1569 * Refuse to truncate files with mandatory locks held on them. 1574 * Refuse to truncate files with mandatory locks held on them.
@@ -1581,12 +1586,17 @@ int may_open(struct path *path, int acc_mode, int flag)
1581 } 1586 }
1582 put_write_access(inode); 1587 put_write_access(inode);
1583 if (error) 1588 if (error)
1584 return error; 1589 goto err_out;
1585 } else 1590 } else
1586 if (flag & FMODE_WRITE) 1591 if (flag & FMODE_WRITE)
1587 vfs_dq_init(inode); 1592 vfs_dq_init(inode);
1588 1593
1589 return 0; 1594 return 0;
1595err_out:
1596 ima_counts_put(path, acc_mode ?
1597 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
1598 ACC_MODE(flag) & (MAY_READ | MAY_WRITE));
1599 return error;
1590} 1600}
1591 1601
1592/* 1602/*
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 845159814de2..da7fda639eac 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,7 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
8 direct.o pagelist.o proc.o read.o symlink.o unlink.o \ 8 direct.o pagelist.o proc.o read.o symlink.o unlink.o \
9 write.o namespace.o mount_clnt.o 9 write.o namespace.o mount_clnt.o \
10 dns_resolve.o cache_lib.o
10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o 11nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 12nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 13nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
new file mode 100644
index 000000000000..b4ffd0146ea6
--- /dev/null
+++ b/fs/nfs/cache_lib.c
@@ -0,0 +1,140 @@
1/*
2 * linux/fs/nfs/cache_lib.c
3 *
4 * Helper routines for the NFS client caches
5 *
6 * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
7 */
8#include <linux/kmod.h>
9#include <linux/module.h>
10#include <linux/moduleparam.h>
11#include <linux/mount.h>
12#include <linux/namei.h>
13#include <linux/sunrpc/cache.h>
14#include <linux/sunrpc/rpc_pipe_fs.h>
15
16#include "cache_lib.h"
17
18#define NFS_CACHE_UPCALL_PATHLEN 256
19#define NFS_CACHE_UPCALL_TIMEOUT 15
20
21static char nfs_cache_getent_prog[NFS_CACHE_UPCALL_PATHLEN] =
22 "/sbin/nfs_cache_getent";
23static unsigned long nfs_cache_getent_timeout = NFS_CACHE_UPCALL_TIMEOUT;
24
25module_param_string(cache_getent, nfs_cache_getent_prog,
26 sizeof(nfs_cache_getent_prog), 0600);
27MODULE_PARM_DESC(cache_getent, "Path to the client cache upcall program");
28module_param_named(cache_getent_timeout, nfs_cache_getent_timeout, ulong, 0600);
29MODULE_PARM_DESC(cache_getent_timeout, "Timeout (in seconds) after which "
30 "the cache upcall is assumed to have failed");
31
32int nfs_cache_upcall(struct cache_detail *cd, char *entry_name)
33{
34 static char *envp[] = { "HOME=/",
35 "TERM=linux",
36 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
37 NULL
38 };
39 char *argv[] = {
40 nfs_cache_getent_prog,
41 cd->name,
42 entry_name,
43 NULL
44 };
45 int ret = -EACCES;
46
47 if (nfs_cache_getent_prog[0] == '\0')
48 goto out;
49 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
50 /*
51 * Disable the upcall mechanism if we're getting an ENOENT or
52 * EACCES error. The admin can re-enable it on the fly by using
53 * sysfs to set the 'cache_getent' parameter once the problem
54 * has been fixed.
55 */
56 if (ret == -ENOENT || ret == -EACCES)
57 nfs_cache_getent_prog[0] = '\0';
58out:
59 return ret > 0 ? 0 : ret;
60}
61
62/*
63 * Deferred request handling
64 */
65void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq)
66{
67 if (atomic_dec_and_test(&dreq->count))
68 kfree(dreq);
69}
70
71static void nfs_dns_cache_revisit(struct cache_deferred_req *d, int toomany)
72{
73 struct nfs_cache_defer_req *dreq;
74
75 dreq = container_of(d, struct nfs_cache_defer_req, deferred_req);
76
77 complete_all(&dreq->completion);
78 nfs_cache_defer_req_put(dreq);
79}
80
81static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req)
82{
83 struct nfs_cache_defer_req *dreq;
84
85 dreq = container_of(req, struct nfs_cache_defer_req, req);
86 dreq->deferred_req.revisit = nfs_dns_cache_revisit;
87 atomic_inc(&dreq->count);
88
89 return &dreq->deferred_req;
90}
91
92struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void)
93{
94 struct nfs_cache_defer_req *dreq;
95
96 dreq = kzalloc(sizeof(*dreq), GFP_KERNEL);
97 if (dreq) {
98 init_completion(&dreq->completion);
99 atomic_set(&dreq->count, 1);
100 dreq->req.defer = nfs_dns_cache_defer;
101 }
102 return dreq;
103}
104
105int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
106{
107 if (wait_for_completion_timeout(&dreq->completion,
108 nfs_cache_getent_timeout * HZ) == 0)
109 return -ETIMEDOUT;
110 return 0;
111}
112
113int nfs_cache_register(struct cache_detail *cd)
114{
115 struct nameidata nd;
116 struct vfsmount *mnt;
117 int ret;
118
119 mnt = rpc_get_mount();
120 if (IS_ERR(mnt))
121 return PTR_ERR(mnt);
122 ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd);
123 if (ret)
124 goto err;
125 ret = sunrpc_cache_register_pipefs(nd.path.dentry,
126 cd->name, 0600, cd);
127 path_put(&nd.path);
128 if (!ret)
129 return ret;
130err:
131 rpc_put_mount();
132 return ret;
133}
134
135void nfs_cache_unregister(struct cache_detail *cd)
136{
137 sunrpc_cache_unregister_pipefs(cd);
138 rpc_put_mount();
139}
140
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
new file mode 100644
index 000000000000..76f856e284e4
--- /dev/null
+++ b/fs/nfs/cache_lib.h
@@ -0,0 +1,27 @@
1/*
2 * Helper routines for the NFS client caches
3 *
4 * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
5 */
6
7#include <linux/completion.h>
8#include <linux/sunrpc/cache.h>
9#include <asm/atomic.h>
10
11/*
12 * Deferred request handling
13 */
14struct nfs_cache_defer_req {
15 struct cache_req req;
16 struct cache_deferred_req deferred_req;
17 struct completion completion;
18 atomic_t count;
19};
20
21extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name);
22extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
23extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
24extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
25
26extern int nfs_cache_register(struct cache_detail *cd);
27extern void nfs_cache_unregister(struct cache_detail *cd);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 7f604c7941fb..293fa0528a6e 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -43,21 +43,29 @@ static struct svc_program nfs4_callback_program;
43unsigned int nfs_callback_set_tcpport; 43unsigned int nfs_callback_set_tcpport;
44unsigned short nfs_callback_tcpport; 44unsigned short nfs_callback_tcpport;
45unsigned short nfs_callback_tcpport6; 45unsigned short nfs_callback_tcpport6;
46static const int nfs_set_port_min = 0; 46#define NFS_CALLBACK_MAXPORTNR (65535U)
47static const int nfs_set_port_max = 65535;
48 47
49static int param_set_port(const char *val, struct kernel_param *kp) 48static int param_set_portnr(const char *val, struct kernel_param *kp)
50{ 49{
51 char *endp; 50 unsigned long num;
52 int num = simple_strtol(val, &endp, 0); 51 int ret;
53 if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) 52
53 if (!val)
54 return -EINVAL;
55 ret = strict_strtoul(val, 0, &num);
56 if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR)
54 return -EINVAL; 57 return -EINVAL;
55 *((int *)kp->arg) = num; 58 *((unsigned int *)kp->arg) = num;
56 return 0; 59 return 0;
57} 60}
58 61
59module_param_call(callback_tcpport, param_set_port, param_get_int, 62static int param_get_portnr(char *buffer, struct kernel_param *kp)
60 &nfs_callback_set_tcpport, 0644); 63{
64 return param_get_uint(buffer, kp);
65}
66#define param_check_portnr(name, p) __param_check(name, p, unsigned int);
67
68module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
61 69
62/* 70/*
63 * This is the NFSv4 callback kernel thread. 71 * This is the NFSv4 callback kernel thread.
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8d25ccb2d51d..e350bd6a2334 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -809,6 +809,9 @@ static int nfs_init_server(struct nfs_server *server,
809 /* Initialise the client representation from the mount data */ 809 /* Initialise the client representation from the mount data */
810 server->flags = data->flags; 810 server->flags = data->flags;
811 server->options = data->options; 811 server->options = data->options;
812 server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
813 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
814 NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
812 815
813 if (data->rsize) 816 if (data->rsize)
814 server->rsize = nfs_block_size(data->rsize, NULL); 817 server->rsize = nfs_block_size(data->rsize, NULL);
@@ -879,6 +882,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
879 server->rsize = NFS_MAX_FILE_IO_SIZE; 882 server->rsize = NFS_MAX_FILE_IO_SIZE;
880 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 883 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
881 884
885 server->backing_dev_info.name = "nfs";
882 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; 886 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
883 887
884 if (server->wsize > max_rpc_payload) 888 if (server->wsize > max_rpc_payload)
@@ -1074,10 +1078,6 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1074 (unsigned long long) server->fsid.major, 1078 (unsigned long long) server->fsid.major,
1075 (unsigned long long) server->fsid.minor); 1079 (unsigned long long) server->fsid.minor);
1076 1080
1077 BUG_ON(!server->nfs_client);
1078 BUG_ON(!server->nfs_client->rpc_ops);
1079 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1080
1081 spin_lock(&nfs_client_lock); 1081 spin_lock(&nfs_client_lock);
1082 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); 1082 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1083 list_add_tail(&server->master_link, &nfs_volume_list); 1083 list_add_tail(&server->master_link, &nfs_volume_list);
@@ -1274,7 +1274,7 @@ static int nfs4_init_server(struct nfs_server *server,
1274 1274
1275 /* Initialise the client representation from the mount data */ 1275 /* Initialise the client representation from the mount data */
1276 server->flags = data->flags; 1276 server->flags = data->flags;
1277 server->caps |= NFS_CAP_ATOMIC_OPEN; 1277 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
1278 server->options = data->options; 1278 server->options = data->options;
1279 1279
1280 /* Get a client record */ 1280 /* Get a client record */
@@ -1359,10 +1359,6 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1359 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) 1359 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1360 server->namelen = NFS4_MAXNAMLEN; 1360 server->namelen = NFS4_MAXNAMLEN;
1361 1361
1362 BUG_ON(!server->nfs_client);
1363 BUG_ON(!server->nfs_client->rpc_ops);
1364 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1365
1366 spin_lock(&nfs_client_lock); 1362 spin_lock(&nfs_client_lock);
1367 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); 1363 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1368 list_add_tail(&server->master_link, &nfs_volume_list); 1364 list_add_tail(&server->master_link, &nfs_volume_list);
@@ -1400,7 +1396,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1400 1396
1401 /* Initialise the client representation from the parent server */ 1397 /* Initialise the client representation from the parent server */
1402 nfs_server_copy_userdata(server, parent_server); 1398 nfs_server_copy_userdata(server, parent_server);
1403 server->caps |= NFS_CAP_ATOMIC_OPEN; 1399 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
1404 1400
1405 /* Get a client representation. 1401 /* Get a client representation.
1406 * Note: NFSv4 always uses TCP, */ 1402 * Note: NFSv4 always uses TCP, */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e4e089a8f294..6c3210099d51 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -934,9 +934,6 @@ out:
934 * back into its cache. We let the server do generic write 934 * back into its cache. We let the server do generic write
935 * parameter checking and report problems. 935 * parameter checking and report problems.
936 * 936 *
937 * We also avoid an unnecessary invocation of generic_osync_inode(),
938 * as it is fairly meaningless to sync the metadata of an NFS file.
939 *
940 * We eliminate local atime updates, see direct read above. 937 * We eliminate local atime updates, see direct read above.
941 * 938 *
942 * We avoid unnecessary page cache invalidations for normal cached 939 * We avoid unnecessary page cache invalidations for normal cached
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
new file mode 100644
index 000000000000..f4d54ba97cc6
--- /dev/null
+++ b/fs/nfs/dns_resolve.c
@@ -0,0 +1,335 @@
1/*
2 * linux/fs/nfs/dns_resolve.c
3 *
4 * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
5 *
6 * Resolves DNS hostnames into valid ip addresses
7 */
8
9#include <linux/hash.h>
10#include <linux/string.h>
11#include <linux/kmod.h>
12#include <linux/module.h>
13#include <linux/socket.h>
14#include <linux/seq_file.h>
15#include <linux/inet.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/sunrpc/cache.h>
18#include <linux/sunrpc/svcauth.h>
19
20#include "dns_resolve.h"
21#include "cache_lib.h"
22
23#define NFS_DNS_HASHBITS 4
24#define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS)
25
26static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE];
27
28struct nfs_dns_ent {
29 struct cache_head h;
30
31 char *hostname;
32 size_t namelen;
33
34 struct sockaddr_storage addr;
35 size_t addrlen;
36};
37
38
39static void nfs_dns_ent_init(struct cache_head *cnew,
40 struct cache_head *ckey)
41{
42 struct nfs_dns_ent *new;
43 struct nfs_dns_ent *key;
44
45 new = container_of(cnew, struct nfs_dns_ent, h);
46 key = container_of(ckey, struct nfs_dns_ent, h);
47
48 kfree(new->hostname);
49 new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL);
50 if (new->hostname) {
51 new->namelen = key->namelen;
52 memcpy(&new->addr, &key->addr, key->addrlen);
53 new->addrlen = key->addrlen;
54 } else {
55 new->namelen = 0;
56 new->addrlen = 0;
57 }
58}
59
60static void nfs_dns_ent_put(struct kref *ref)
61{
62 struct nfs_dns_ent *item;
63
64 item = container_of(ref, struct nfs_dns_ent, h.ref);
65 kfree(item->hostname);
66 kfree(item);
67}
68
69static struct cache_head *nfs_dns_ent_alloc(void)
70{
71 struct nfs_dns_ent *item = kmalloc(sizeof(*item), GFP_KERNEL);
72
73 if (item != NULL) {
74 item->hostname = NULL;
75 item->namelen = 0;
76 item->addrlen = 0;
77 return &item->h;
78 }
79 return NULL;
80};
81
82static unsigned int nfs_dns_hash(const struct nfs_dns_ent *key)
83{
84 return hash_str(key->hostname, NFS_DNS_HASHBITS);
85}
86
87static void nfs_dns_request(struct cache_detail *cd,
88 struct cache_head *ch,
89 char **bpp, int *blen)
90{
91 struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
92
93 qword_add(bpp, blen, key->hostname);
94 (*bpp)[-1] = '\n';
95}
96
97static int nfs_dns_upcall(struct cache_detail *cd,
98 struct cache_head *ch)
99{
100 struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
101 int ret;
102
103 ret = nfs_cache_upcall(cd, key->hostname);
104 if (ret)
105 ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request);
106 return ret;
107}
108
109static int nfs_dns_match(struct cache_head *ca,
110 struct cache_head *cb)
111{
112 struct nfs_dns_ent *a;
113 struct nfs_dns_ent *b;
114
115 a = container_of(ca, struct nfs_dns_ent, h);
116 b = container_of(cb, struct nfs_dns_ent, h);
117
118 if (a->namelen == 0 || a->namelen != b->namelen)
119 return 0;
120 return memcmp(a->hostname, b->hostname, a->namelen) == 0;
121}
122
123static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
124 struct cache_head *h)
125{
126 struct nfs_dns_ent *item;
127 long ttl;
128
129 if (h == NULL) {
130 seq_puts(m, "# ip address hostname ttl\n");
131 return 0;
132 }
133 item = container_of(h, struct nfs_dns_ent, h);
134 ttl = (long)item->h.expiry_time - (long)get_seconds();
135 if (ttl < 0)
136 ttl = 0;
137
138 if (!test_bit(CACHE_NEGATIVE, &h->flags)) {
139 char buf[INET6_ADDRSTRLEN+IPV6_SCOPE_ID_LEN+1];
140
141 rpc_ntop((struct sockaddr *)&item->addr, buf, sizeof(buf));
142 seq_printf(m, "%15s ", buf);
143 } else
144 seq_puts(m, "<none> ");
145 seq_printf(m, "%15s %ld\n", item->hostname, ttl);
146 return 0;
147}
148
149struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd,
150 struct nfs_dns_ent *key)
151{
152 struct cache_head *ch;
153
154 ch = sunrpc_cache_lookup(cd,
155 &key->h,
156 nfs_dns_hash(key));
157 if (!ch)
158 return NULL;
159 return container_of(ch, struct nfs_dns_ent, h);
160}
161
162struct nfs_dns_ent *nfs_dns_update(struct cache_detail *cd,
163 struct nfs_dns_ent *new,
164 struct nfs_dns_ent *key)
165{
166 struct cache_head *ch;
167
168 ch = sunrpc_cache_update(cd,
169 &new->h, &key->h,
170 nfs_dns_hash(key));
171 if (!ch)
172 return NULL;
173 return container_of(ch, struct nfs_dns_ent, h);
174}
175
176static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
177{
178 char buf1[NFS_DNS_HOSTNAME_MAXLEN+1];
179 struct nfs_dns_ent key, *item;
180 unsigned long ttl;
181 ssize_t len;
182 int ret = -EINVAL;
183
184 if (buf[buflen-1] != '\n')
185 goto out;
186 buf[buflen-1] = '\0';
187
188 len = qword_get(&buf, buf1, sizeof(buf1));
189 if (len <= 0)
190 goto out;
191 key.addrlen = rpc_pton(buf1, len,
192 (struct sockaddr *)&key.addr,
193 sizeof(key.addr));
194
195 len = qword_get(&buf, buf1, sizeof(buf1));
196 if (len <= 0)
197 goto out;
198
199 key.hostname = buf1;
200 key.namelen = len;
201 memset(&key.h, 0, sizeof(key.h));
202
203 ttl = get_expiry(&buf);
204 if (ttl == 0)
205 goto out;
206 key.h.expiry_time = ttl + get_seconds();
207
208 ret = -ENOMEM;
209 item = nfs_dns_lookup(cd, &key);
210 if (item == NULL)
211 goto out;
212
213 if (key.addrlen == 0)
214 set_bit(CACHE_NEGATIVE, &key.h.flags);
215
216 item = nfs_dns_update(cd, &key, item);
217 if (item == NULL)
218 goto out;
219
220 ret = 0;
221 cache_put(&item->h, cd);
222out:
223 return ret;
224}
225
226static struct cache_detail nfs_dns_resolve = {
227 .owner = THIS_MODULE,
228 .hash_size = NFS_DNS_HASHTBL_SIZE,
229 .hash_table = nfs_dns_table,
230 .name = "dns_resolve",
231 .cache_put = nfs_dns_ent_put,
232 .cache_upcall = nfs_dns_upcall,
233 .cache_parse = nfs_dns_parse,
234 .cache_show = nfs_dns_show,
235 .match = nfs_dns_match,
236 .init = nfs_dns_ent_init,
237 .update = nfs_dns_ent_init,
238 .alloc = nfs_dns_ent_alloc,
239};
240
241static int do_cache_lookup(struct cache_detail *cd,
242 struct nfs_dns_ent *key,
243 struct nfs_dns_ent **item,
244 struct nfs_cache_defer_req *dreq)
245{
246 int ret = -ENOMEM;
247
248 *item = nfs_dns_lookup(cd, key);
249 if (*item) {
250 ret = cache_check(cd, &(*item)->h, &dreq->req);
251 if (ret)
252 *item = NULL;
253 }
254 return ret;
255}
256
257static int do_cache_lookup_nowait(struct cache_detail *cd,
258 struct nfs_dns_ent *key,
259 struct nfs_dns_ent **item)
260{
261 int ret = -ENOMEM;
262
263 *item = nfs_dns_lookup(cd, key);
264 if (!*item)
265 goto out_err;
266 ret = -ETIMEDOUT;
267 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
268 || (*item)->h.expiry_time < get_seconds()
269 || cd->flush_time > (*item)->h.last_refresh)
270 goto out_put;
271 ret = -ENOENT;
272 if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
273 goto out_put;
274 return 0;
275out_put:
276 cache_put(&(*item)->h, cd);
277out_err:
278 *item = NULL;
279 return ret;
280}
281
282static int do_cache_lookup_wait(struct cache_detail *cd,
283 struct nfs_dns_ent *key,
284 struct nfs_dns_ent **item)
285{
286 struct nfs_cache_defer_req *dreq;
287 int ret = -ENOMEM;
288
289 dreq = nfs_cache_defer_req_alloc();
290 if (!dreq)
291 goto out;
292 ret = do_cache_lookup(cd, key, item, dreq);
293 if (ret == -EAGAIN) {
294 ret = nfs_cache_wait_for_upcall(dreq);
295 if (!ret)
296 ret = do_cache_lookup_nowait(cd, key, item);
297 }
298 nfs_cache_defer_req_put(dreq);
299out:
300 return ret;
301}
302
303ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
304 struct sockaddr *sa, size_t salen)
305{
306 struct nfs_dns_ent key = {
307 .hostname = name,
308 .namelen = namelen,
309 };
310 struct nfs_dns_ent *item = NULL;
311 ssize_t ret;
312
313 ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item);
314 if (ret == 0) {
315 if (salen >= item->addrlen) {
316 memcpy(sa, &item->addr, item->addrlen);
317 ret = item->addrlen;
318 } else
319 ret = -EOVERFLOW;
320 cache_put(&item->h, &nfs_dns_resolve);
321 } else if (ret == -ENOENT)
322 ret = -ESRCH;
323 return ret;
324}
325
326int nfs_dns_resolver_init(void)
327{
328 return nfs_cache_register(&nfs_dns_resolve);
329}
330
331void nfs_dns_resolver_destroy(void)
332{
333 nfs_cache_unregister(&nfs_dns_resolve);
334}
335
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h
new file mode 100644
index 000000000000..a3f0938babf7
--- /dev/null
+++ b/fs/nfs/dns_resolve.h
@@ -0,0 +1,14 @@
1/*
2 * Resolve DNS hostnames into valid ip addresses
3 */
4#ifndef __LINUX_FS_NFS_DNS_RESOLVE_H
5#define __LINUX_FS_NFS_DNS_RESOLVE_H
6
7#define NFS_DNS_HOSTNAME_MAXLEN (128)
8
9extern int nfs_dns_resolver_init(void);
10extern void nfs_dns_resolver_destroy(void);
11extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
12 struct sockaddr *sa, size_t salen);
13
14#endif
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 05062329b678..5021b75d2d1e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -328,6 +328,42 @@ nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)
328} 328}
329 329
330/* 330/*
331 * Decide whether a read/modify/write cycle may be more efficient
332 * then a modify/write/read cycle when writing to a page in the
333 * page cache.
334 *
335 * The modify/write/read cycle may occur if a page is read before
336 * being completely filled by the writer. In this situation, the
337 * page must be completely written to stable storage on the server
338 * before it can be refilled by reading in the page from the server.
339 * This can lead to expensive, small, FILE_SYNC mode writes being
340 * done.
341 *
342 * It may be more efficient to read the page first if the file is
343 * open for reading in addition to writing, the page is not marked
344 * as Uptodate, it is not dirty or waiting to be committed,
345 * indicating that it was previously allocated and then modified,
346 * that there were valid bytes of data in that range of the file,
347 * and that the new data won't completely replace the old data in
348 * that range of the file.
349 */
350static int nfs_want_read_modify_write(struct file *file, struct page *page,
351 loff_t pos, unsigned len)
352{
353 unsigned int pglen = nfs_page_length(page);
354 unsigned int offset = pos & (PAGE_CACHE_SIZE - 1);
355 unsigned int end = offset + len;
356
357 if ((file->f_mode & FMODE_READ) && /* open for read? */
358 !PageUptodate(page) && /* Uptodate? */
359 !PagePrivate(page) && /* i/o request already? */
360 pglen && /* valid bytes of file? */
361 (end < pglen || offset)) /* replace all valid bytes? */
362 return 1;
363 return 0;
364}
365
366/*
331 * This does the "real" work of the write. We must allocate and lock the 367 * This does the "real" work of the write. We must allocate and lock the
332 * page to be sent back to the generic routine, which then copies the 368 * page to be sent back to the generic routine, which then copies the
333 * data from user space. 369 * data from user space.
@@ -340,15 +376,16 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
340 struct page **pagep, void **fsdata) 376 struct page **pagep, void **fsdata)
341{ 377{
342 int ret; 378 int ret;
343 pgoff_t index; 379 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
344 struct page *page; 380 struct page *page;
345 index = pos >> PAGE_CACHE_SHIFT; 381 int once_thru = 0;
346 382
347 dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", 383 dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
348 file->f_path.dentry->d_parent->d_name.name, 384 file->f_path.dentry->d_parent->d_name.name,
349 file->f_path.dentry->d_name.name, 385 file->f_path.dentry->d_name.name,
350 mapping->host->i_ino, len, (long long) pos); 386 mapping->host->i_ino, len, (long long) pos);
351 387
388start:
352 /* 389 /*
353 * Prevent starvation issues if someone is doing a consistency 390 * Prevent starvation issues if someone is doing a consistency
354 * sync-to-disk 391 * sync-to-disk
@@ -367,6 +404,13 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
367 if (ret) { 404 if (ret) {
368 unlock_page(page); 405 unlock_page(page);
369 page_cache_release(page); 406 page_cache_release(page);
407 } else if (!once_thru &&
408 nfs_want_read_modify_write(file, page, pos, len)) {
409 once_thru = 1;
410 ret = nfs_readpage(file, page);
411 page_cache_release(page);
412 if (!ret)
413 goto start;
370 } 414 }
371 return ret; 415 return ret;
372} 416}
@@ -479,6 +523,7 @@ const struct address_space_operations nfs_file_aops = {
479 .invalidatepage = nfs_invalidate_page, 523 .invalidatepage = nfs_invalidate_page,
480 .releasepage = nfs_release_page, 524 .releasepage = nfs_release_page,
481 .direct_IO = nfs_direct_IO, 525 .direct_IO = nfs_direct_IO,
526 .migratepage = nfs_migrate_page,
482 .launder_page = nfs_launder_page, 527 .launder_page = nfs_launder_page,
483}; 528};
484 529
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 86147b0ab2cf..21a84d45916f 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -101,7 +101,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
101 101
102static unsigned int fnvhash32(const void *, size_t); 102static unsigned int fnvhash32(const void *, size_t);
103 103
104static struct rpc_pipe_ops idmap_upcall_ops = { 104static const struct rpc_pipe_ops idmap_upcall_ops = {
105 .upcall = idmap_pipe_upcall, 105 .upcall = idmap_pipe_upcall,
106 .downcall = idmap_pipe_downcall, 106 .downcall = idmap_pipe_downcall,
107 .destroy_msg = idmap_pipe_destroy_msg, 107 .destroy_msg = idmap_pipe_destroy_msg,
@@ -119,8 +119,8 @@ nfs_idmap_new(struct nfs_client *clp)
119 if (idmap == NULL) 119 if (idmap == NULL)
120 return -ENOMEM; 120 return -ENOMEM;
121 121
122 idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", 122 idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry,
123 idmap, &idmap_upcall_ops, 0); 123 "idmap", idmap, &idmap_upcall_ops, 0);
124 if (IS_ERR(idmap->idmap_dentry)) { 124 if (IS_ERR(idmap->idmap_dentry)) {
125 error = PTR_ERR(idmap->idmap_dentry); 125 error = PTR_ERR(idmap->idmap_dentry);
126 kfree(idmap); 126 kfree(idmap);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd7938eda6a8..060022b4651c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -46,6 +46,7 @@
46#include "iostat.h" 46#include "iostat.h"
47#include "internal.h" 47#include "internal.h"
48#include "fscache.h" 48#include "fscache.h"
49#include "dns_resolve.h"
49 50
50#define NFSDBG_FACILITY NFSDBG_VFS 51#define NFSDBG_FACILITY NFSDBG_VFS
51 52
@@ -286,6 +287,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
286 /* We can't support update_atime(), since the server will reset it */ 287 /* We can't support update_atime(), since the server will reset it */
287 inode->i_flags |= S_NOATIME|S_NOCMTIME; 288 inode->i_flags |= S_NOATIME|S_NOCMTIME;
288 inode->i_mode = fattr->mode; 289 inode->i_mode = fattr->mode;
290 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
291 && nfs_server_capable(inode, NFS_CAP_MODE))
292 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
293 | NFS_INO_INVALID_ACCESS
294 | NFS_INO_INVALID_ACL;
289 /* Why so? Because we want revalidate for devices/FIFOs, and 295 /* Why so? Because we want revalidate for devices/FIFOs, and
290 * that's precisely what we have in nfs_file_inode_operations. 296 * that's precisely what we have in nfs_file_inode_operations.
291 */ 297 */
@@ -330,20 +336,46 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
330 nfsi->attr_gencount = fattr->gencount; 336 nfsi->attr_gencount = fattr->gencount;
331 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 337 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
332 inode->i_atime = fattr->atime; 338 inode->i_atime = fattr->atime;
339 else if (nfs_server_capable(inode, NFS_CAP_ATIME))
340 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
333 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 341 if (fattr->valid & NFS_ATTR_FATTR_MTIME)
334 inode->i_mtime = fattr->mtime; 342 inode->i_mtime = fattr->mtime;
343 else if (nfs_server_capable(inode, NFS_CAP_MTIME))
344 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
345 | NFS_INO_INVALID_DATA;
335 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 346 if (fattr->valid & NFS_ATTR_FATTR_CTIME)
336 inode->i_ctime = fattr->ctime; 347 inode->i_ctime = fattr->ctime;
348 else if (nfs_server_capable(inode, NFS_CAP_CTIME))
349 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
350 | NFS_INO_INVALID_ACCESS
351 | NFS_INO_INVALID_ACL;
337 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) 352 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
338 nfsi->change_attr = fattr->change_attr; 353 nfsi->change_attr = fattr->change_attr;
354 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
355 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
356 | NFS_INO_INVALID_DATA;
339 if (fattr->valid & NFS_ATTR_FATTR_SIZE) 357 if (fattr->valid & NFS_ATTR_FATTR_SIZE)
340 inode->i_size = nfs_size_to_loff_t(fattr->size); 358 inode->i_size = nfs_size_to_loff_t(fattr->size);
359 else
360 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
361 | NFS_INO_INVALID_DATA
362 | NFS_INO_REVAL_PAGECACHE;
341 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 363 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
342 inode->i_nlink = fattr->nlink; 364 inode->i_nlink = fattr->nlink;
365 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
366 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
343 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 367 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
344 inode->i_uid = fattr->uid; 368 inode->i_uid = fattr->uid;
369 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
370 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
371 | NFS_INO_INVALID_ACCESS
372 | NFS_INO_INVALID_ACL;
345 if (fattr->valid & NFS_ATTR_FATTR_GROUP) 373 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
346 inode->i_gid = fattr->gid; 374 inode->i_gid = fattr->gid;
375 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
376 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
377 | NFS_INO_INVALID_ACCESS
378 | NFS_INO_INVALID_ACL;
347 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 379 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
348 inode->i_blocks = fattr->du.nfs2.blocks; 380 inode->i_blocks = fattr->du.nfs2.blocks;
349 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 381 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -1145,6 +1177,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1145 loff_t cur_isize, new_isize; 1177 loff_t cur_isize, new_isize;
1146 unsigned long invalid = 0; 1178 unsigned long invalid = 0;
1147 unsigned long now = jiffies; 1179 unsigned long now = jiffies;
1180 unsigned long save_cache_validity;
1148 1181
1149 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", 1182 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
1150 __func__, inode->i_sb->s_id, inode->i_ino, 1183 __func__, inode->i_sb->s_id, inode->i_ino,
@@ -1171,10 +1204,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1171 */ 1204 */
1172 nfsi->read_cache_jiffies = fattr->time_start; 1205 nfsi->read_cache_jiffies = fattr->time_start;
1173 1206
1174 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME))) 1207 save_cache_validity = nfsi->cache_validity;
1175 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR 1208 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
1176 | NFS_INO_INVALID_ATIME 1209 | NFS_INO_INVALID_ATIME
1177 | NFS_INO_REVAL_PAGECACHE); 1210 | NFS_INO_REVAL_FORCED
1211 | NFS_INO_REVAL_PAGECACHE);
1178 1212
1179 /* Do atomic weak cache consistency updates */ 1213 /* Do atomic weak cache consistency updates */
1180 nfs_wcc_update_inode(inode, fattr); 1214 nfs_wcc_update_inode(inode, fattr);
@@ -1189,7 +1223,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1189 nfs_force_lookup_revalidate(inode); 1223 nfs_force_lookup_revalidate(inode);
1190 nfsi->change_attr = fattr->change_attr; 1224 nfsi->change_attr = fattr->change_attr;
1191 } 1225 }
1192 } 1226 } else if (server->caps & NFS_CAP_CHANGE_ATTR)
1227 invalid |= save_cache_validity;
1193 1228
1194 if (fattr->valid & NFS_ATTR_FATTR_MTIME) { 1229 if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1195 /* NFSv2/v3: Check if the mtime agrees */ 1230 /* NFSv2/v3: Check if the mtime agrees */
@@ -1201,7 +1236,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1201 nfs_force_lookup_revalidate(inode); 1236 nfs_force_lookup_revalidate(inode);
1202 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 1237 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1203 } 1238 }
1204 } 1239 } else if (server->caps & NFS_CAP_MTIME)
1240 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1241 | NFS_INO_INVALID_DATA
1242 | NFS_INO_REVAL_PAGECACHE
1243 | NFS_INO_REVAL_FORCED);
1244
1205 if (fattr->valid & NFS_ATTR_FATTR_CTIME) { 1245 if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1206 /* If ctime has changed we should definitely clear access+acl caches */ 1246 /* If ctime has changed we should definitely clear access+acl caches */
1207 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { 1247 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
@@ -1215,7 +1255,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1215 } 1255 }
1216 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 1256 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1217 } 1257 }
1218 } 1258 } else if (server->caps & NFS_CAP_CTIME)
1259 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1260 | NFS_INO_INVALID_ACCESS
1261 | NFS_INO_INVALID_ACL
1262 | NFS_INO_REVAL_FORCED);
1219 1263
1220 /* Check if our cached file size is stale */ 1264 /* Check if our cached file size is stale */
1221 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { 1265 if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
@@ -1231,30 +1275,50 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1231 dprintk("NFS: isize change on server for file %s/%ld\n", 1275 dprintk("NFS: isize change on server for file %s/%ld\n",
1232 inode->i_sb->s_id, inode->i_ino); 1276 inode->i_sb->s_id, inode->i_ino);
1233 } 1277 }
1234 } 1278 } else
1279 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1280 | NFS_INO_REVAL_PAGECACHE
1281 | NFS_INO_REVAL_FORCED);
1235 1282
1236 1283
1237 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 1284 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
1238 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); 1285 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
1286 else if (server->caps & NFS_CAP_ATIME)
1287 invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME
1288 | NFS_INO_REVAL_FORCED);
1239 1289
1240 if (fattr->valid & NFS_ATTR_FATTR_MODE) { 1290 if (fattr->valid & NFS_ATTR_FATTR_MODE) {
1241 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { 1291 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
1242 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1292 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1243 inode->i_mode = fattr->mode; 1293 inode->i_mode = fattr->mode;
1244 } 1294 }
1245 } 1295 } else if (server->caps & NFS_CAP_MODE)
1296 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1297 | NFS_INO_INVALID_ACCESS
1298 | NFS_INO_INVALID_ACL
1299 | NFS_INO_REVAL_FORCED);
1300
1246 if (fattr->valid & NFS_ATTR_FATTR_OWNER) { 1301 if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
1247 if (inode->i_uid != fattr->uid) { 1302 if (inode->i_uid != fattr->uid) {
1248 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1303 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1249 inode->i_uid = fattr->uid; 1304 inode->i_uid = fattr->uid;
1250 } 1305 }
1251 } 1306 } else if (server->caps & NFS_CAP_OWNER)
1307 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1308 | NFS_INO_INVALID_ACCESS
1309 | NFS_INO_INVALID_ACL
1310 | NFS_INO_REVAL_FORCED);
1311
1252 if (fattr->valid & NFS_ATTR_FATTR_GROUP) { 1312 if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
1253 if (inode->i_gid != fattr->gid) { 1313 if (inode->i_gid != fattr->gid) {
1254 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1314 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1255 inode->i_gid = fattr->gid; 1315 inode->i_gid = fattr->gid;
1256 } 1316 }
1257 } 1317 } else if (server->caps & NFS_CAP_OWNER_GROUP)
1318 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1319 | NFS_INO_INVALID_ACCESS
1320 | NFS_INO_INVALID_ACL
1321 | NFS_INO_REVAL_FORCED);
1258 1322
1259 if (fattr->valid & NFS_ATTR_FATTR_NLINK) { 1323 if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
1260 if (inode->i_nlink != fattr->nlink) { 1324 if (inode->i_nlink != fattr->nlink) {
@@ -1263,7 +1327,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1263 invalid |= NFS_INO_INVALID_DATA; 1327 invalid |= NFS_INO_INVALID_DATA;
1264 inode->i_nlink = fattr->nlink; 1328 inode->i_nlink = fattr->nlink;
1265 } 1329 }
1266 } 1330 } else if (server->caps & NFS_CAP_NLINK)
1331 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1332 | NFS_INO_REVAL_FORCED);
1267 1333
1268 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 1334 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
1269 /* 1335 /*
@@ -1293,9 +1359,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1293 || S_ISLNK(inode->i_mode))) 1359 || S_ISLNK(inode->i_mode)))
1294 invalid &= ~NFS_INO_INVALID_DATA; 1360 invalid &= ~NFS_INO_INVALID_DATA;
1295 if (!nfs_have_delegation(inode, FMODE_READ) || 1361 if (!nfs_have_delegation(inode, FMODE_READ) ||
1296 (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) 1362 (save_cache_validity & NFS_INO_REVAL_FORCED))
1297 nfsi->cache_validity |= invalid; 1363 nfsi->cache_validity |= invalid;
1298 nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
1299 1364
1300 return 0; 1365 return 0;
1301 out_changed: 1366 out_changed:
@@ -1442,6 +1507,10 @@ static int __init init_nfs_fs(void)
1442{ 1507{
1443 int err; 1508 int err;
1444 1509
1510 err = nfs_dns_resolver_init();
1511 if (err < 0)
1512 goto out8;
1513
1445 err = nfs_fscache_register(); 1514 err = nfs_fscache_register();
1446 if (err < 0) 1515 if (err < 0)
1447 goto out7; 1516 goto out7;
@@ -1500,6 +1569,8 @@ out5:
1500out6: 1569out6:
1501 nfs_fscache_unregister(); 1570 nfs_fscache_unregister();
1502out7: 1571out7:
1572 nfs_dns_resolver_destroy();
1573out8:
1503 return err; 1574 return err;
1504} 1575}
1505 1576
@@ -1511,6 +1582,7 @@ static void __exit exit_nfs_fs(void)
1511 nfs_destroy_inodecache(); 1582 nfs_destroy_inodecache();
1512 nfs_destroy_nfspagecache(); 1583 nfs_destroy_nfspagecache();
1513 nfs_fscache_unregister(); 1584 nfs_fscache_unregister();
1585 nfs_dns_resolver_destroy();
1514#ifdef CONFIG_PROC_FS 1586#ifdef CONFIG_PROC_FS
1515 rpc_proc_unregister("nfs"); 1587 rpc_proc_unregister("nfs");
1516#endif 1588#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7dd90a6769d0..e21b1bb9972f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -49,6 +49,11 @@ struct nfs_clone_mount {
49#define NFS_MAX_SECFLAVORS (12) 49#define NFS_MAX_SECFLAVORS (12)
50 50
51/* 51/*
52 * Value used if the user did not specify a port value.
53 */
54#define NFS_UNSPEC_PORT (-1)
55
56/*
52 * In-kernel mount arguments 57 * In-kernel mount arguments
53 */ 58 */
54struct nfs_parsed_mount_data { 59struct nfs_parsed_mount_data {
@@ -63,6 +68,7 @@ struct nfs_parsed_mount_data {
63 unsigned int auth_flavor_len; 68 unsigned int auth_flavor_len;
64 rpc_authflavor_t auth_flavors[1]; 69 rpc_authflavor_t auth_flavors[1];
65 char *client_address; 70 char *client_address;
71 unsigned int version;
66 unsigned int minorversion; 72 unsigned int minorversion;
67 char *fscache_uniq; 73 char *fscache_uniq;
68 74
@@ -71,7 +77,7 @@ struct nfs_parsed_mount_data {
71 size_t addrlen; 77 size_t addrlen;
72 char *hostname; 78 char *hostname;
73 u32 version; 79 u32 version;
74 unsigned short port; 80 int port;
75 unsigned short protocol; 81 unsigned short protocol;
76 } mount_server; 82 } mount_server;
77 83
@@ -80,7 +86,7 @@ struct nfs_parsed_mount_data {
80 size_t addrlen; 86 size_t addrlen;
81 char *hostname; 87 char *hostname;
82 char *export_path; 88 char *export_path;
83 unsigned short port; 89 int port;
84 unsigned short protocol; 90 unsigned short protocol;
85 } nfs_server; 91 } nfs_server;
86 92
@@ -102,6 +108,7 @@ struct nfs_mount_request {
102}; 108};
103 109
104extern int nfs_mount(struct nfs_mount_request *info); 110extern int nfs_mount(struct nfs_mount_request *info);
111extern void nfs_umount(const struct nfs_mount_request *info);
105 112
106/* client.c */ 113/* client.c */
107extern struct rpc_program nfs_program; 114extern struct rpc_program nfs_program;
@@ -213,7 +220,6 @@ void nfs_zap_acl_cache(struct inode *inode);
213extern int nfs_wait_bit_killable(void *word); 220extern int nfs_wait_bit_killable(void *word);
214 221
215/* super.c */ 222/* super.c */
216void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *);
217extern struct file_system_type nfs_xdev_fs_type; 223extern struct file_system_type nfs_xdev_fs_type;
218#ifdef CONFIG_NFS_V4 224#ifdef CONFIG_NFS_V4
219extern struct file_system_type nfs4_xdev_fs_type; 225extern struct file_system_type nfs4_xdev_fs_type;
@@ -248,6 +254,12 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
248 254
249/* write.c */ 255/* write.c */
250extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 256extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
257#ifdef CONFIG_MIGRATION
258extern int nfs_migrate_page(struct address_space *,
259 struct page *, struct page *);
260#else
261#define nfs_migrate_page NULL
262#endif
251 263
252/* nfs4proc.c */ 264/* nfs4proc.c */
253extern int _nfs4_call_sync(struct nfs_server *server, 265extern int _nfs4_call_sync(struct nfs_server *server,
@@ -368,24 +380,3 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
368 return ((unsigned long)len + (unsigned long)base + 380 return ((unsigned long)len + (unsigned long)base +
369 PAGE_SIZE - 1) >> PAGE_SHIFT; 381 PAGE_SIZE - 1) >> PAGE_SHIFT;
370} 382}
371
372#define IPV6_SCOPE_DELIMITER '%'
373
374/*
375 * Set the port number in an address. Be agnostic about the address
376 * family.
377 */
378static inline void nfs_set_port(struct sockaddr *sap, unsigned short port)
379{
380 struct sockaddr_in *ap = (struct sockaddr_in *)sap;
381 struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap;
382
383 switch (sap->sa_family) {
384 case AF_INET:
385 ap->sin_port = htons(port);
386 break;
387 case AF_INET6:
388 ap6->sin6_port = htons(port);
389 break;
390 }
391}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 38ef9eaec407..0adefc40cc89 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -209,6 +209,71 @@ out_mnt_err:
209 goto out; 209 goto out;
210} 210}
211 211
212/**
213 * nfs_umount - Notify a server that we have unmounted this export
214 * @info: pointer to umount request arguments
215 *
216 * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always
217 * use UDP.
218 */
219void nfs_umount(const struct nfs_mount_request *info)
220{
221 static const struct rpc_timeout nfs_umnt_timeout = {
222 .to_initval = 1 * HZ,
223 .to_maxval = 3 * HZ,
224 .to_retries = 2,
225 };
226 struct rpc_create_args args = {
227 .protocol = IPPROTO_UDP,
228 .address = info->sap,
229 .addrsize = info->salen,
230 .timeout = &nfs_umnt_timeout,
231 .servername = info->hostname,
232 .program = &mnt_program,
233 .version = info->version,
234 .authflavor = RPC_AUTH_UNIX,
235 .flags = RPC_CLNT_CREATE_NOPING,
236 };
237 struct mountres result;
238 struct rpc_message msg = {
239 .rpc_argp = info->dirpath,
240 .rpc_resp = &result,
241 };
242 struct rpc_clnt *clnt;
243 int status;
244
245 if (info->noresvport)
246 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
247
248 clnt = rpc_create(&args);
249 if (unlikely(IS_ERR(clnt)))
250 goto out_clnt_err;
251
252 dprintk("NFS: sending UMNT request for %s:%s\n",
253 (info->hostname ? info->hostname : "server"), info->dirpath);
254
255 if (info->version == NFS_MNT3_VERSION)
256 msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT];
257 else
258 msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT];
259
260 status = rpc_call_sync(clnt, &msg, 0);
261 rpc_shutdown_client(clnt);
262
263 if (unlikely(status < 0))
264 goto out_call_err;
265
266 return;
267
268out_clnt_err:
269 dprintk("NFS: failed to create UMNT RPC client, status=%ld\n",
270 PTR_ERR(clnt));
271 return;
272
273out_call_err:
274 dprintk("NFS: UMNT request failed, status=%d\n", status);
275}
276
212/* 277/*
213 * XDR encode/decode functions for MOUNT 278 * XDR encode/decode functions for MOUNT
214 */ 279 */
@@ -258,7 +323,7 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res)
258 return -EIO; 323 return -EIO;
259 status = ntohl(*p); 324 status = ntohl(*p);
260 325
261 for (i = 0; i <= ARRAY_SIZE(mnt_errtbl); i++) { 326 for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) {
262 if (mnt_errtbl[i].status == status) { 327 if (mnt_errtbl[i].status == status) {
263 res->errno = mnt_errtbl[i].errno; 328 res->errno = mnt_errtbl[i].errno;
264 return 0; 329 return 0;
@@ -309,7 +374,7 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
309 return -EIO; 374 return -EIO;
310 status = ntohl(*p); 375 status = ntohl(*p);
311 376
312 for (i = 0; i <= ARRAY_SIZE(mnt3_errtbl); i++) { 377 for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) {
313 if (mnt3_errtbl[i].status == status) { 378 if (mnt3_errtbl[i].status == status) {
314 res->errno = mnt3_errtbl[i].errno; 379 res->errno = mnt3_errtbl[i].errno;
315 return 0; 380 return 0;
@@ -407,6 +472,13 @@ static struct rpc_procinfo mnt_procedures[] = {
407 .p_statidx = MOUNTPROC_MNT, 472 .p_statidx = MOUNTPROC_MNT,
408 .p_name = "MOUNT", 473 .p_name = "MOUNT",
409 }, 474 },
475 [MOUNTPROC_UMNT] = {
476 .p_proc = MOUNTPROC_UMNT,
477 .p_encode = (kxdrproc_t)mnt_enc_dirpath,
478 .p_arglen = MNT_enc_dirpath_sz,
479 .p_statidx = MOUNTPROC_UMNT,
480 .p_name = "UMOUNT",
481 },
410}; 482};
411 483
412static struct rpc_procinfo mnt3_procedures[] = { 484static struct rpc_procinfo mnt3_procedures[] = {
@@ -419,6 +491,13 @@ static struct rpc_procinfo mnt3_procedures[] = {
419 .p_statidx = MOUNTPROC3_MNT, 491 .p_statidx = MOUNTPROC3_MNT,
420 .p_name = "MOUNT", 492 .p_name = "MOUNT",
421 }, 493 },
494 [MOUNTPROC3_UMNT] = {
495 .p_proc = MOUNTPROC3_UMNT,
496 .p_encode = (kxdrproc_t)mnt_enc_dirpath,
497 .p_arglen = MNT_enc_dirpath_sz,
498 .p_statidx = MOUNTPROC3_UMNT,
499 .p_name = "UMOUNT",
500 },
422}; 501};
423 502
424 503
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d0cc5ce0edfe..ee6a13f05443 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -299,7 +299,6 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
299 299
300/* 300/*
301 * Create a regular file. 301 * Create a regular file.
302 * For now, we don't implement O_EXCL.
303 */ 302 */
304static int 303static int
305nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 304nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 2a2a0a7143ad..2636c26d56fa 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -17,6 +17,7 @@
17#include <linux/inet.h> 17#include <linux/inet.h>
18#include "internal.h" 18#include "internal.h"
19#include "nfs4_fs.h" 19#include "nfs4_fs.h"
20#include "dns_resolve.h"
20 21
21#define NFSDBG_FACILITY NFSDBG_VFS 22#define NFSDBG_FACILITY NFSDBG_VFS
22 23
@@ -95,6 +96,20 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
95 return 0; 96 return 0;
96} 97}
97 98
99static size_t nfs_parse_server_name(char *string, size_t len,
100 struct sockaddr *sa, size_t salen)
101{
102 ssize_t ret;
103
104 ret = rpc_pton(string, len, sa, salen);
105 if (ret == 0) {
106 ret = nfs_dns_resolve_name(string, len, sa, salen);
107 if (ret < 0)
108 ret = 0;
109 }
110 return ret;
111}
112
98static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, 113static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
99 char *page, char *page2, 114 char *page, char *page2,
100 const struct nfs4_fs_location *location) 115 const struct nfs4_fs_location *location)
@@ -121,11 +136,12 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
121 136
122 if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) 137 if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len))
123 continue; 138 continue;
124 nfs_parse_ip_address(buf->data, buf->len, 139 mountdata->addrlen = nfs_parse_server_name(buf->data,
125 mountdata->addr, &mountdata->addrlen); 140 buf->len,
126 if (mountdata->addr->sa_family == AF_UNSPEC) 141 mountdata->addr, mountdata->addrlen);
142 if (mountdata->addrlen == 0)
127 continue; 143 continue;
128 nfs_set_port(mountdata->addr, NFS_PORT); 144 rpc_set_port(mountdata->addr, NFS_PORT);
129 145
130 memcpy(page2, buf->data, buf->len); 146 memcpy(page2, buf->data, buf->len);
131 page2[buf->len] = '\0'; 147 page2[buf->len] = '\0';
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6917311f201c..be6544aef41f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -61,6 +61,8 @@
61#define NFS4_POLL_RETRY_MIN (HZ/10) 61#define NFS4_POLL_RETRY_MIN (HZ/10)
62#define NFS4_POLL_RETRY_MAX (15*HZ) 62#define NFS4_POLL_RETRY_MAX (15*HZ)
63 63
64#define NFS4_MAX_LOOP_ON_RECOVER (10)
65
64struct nfs4_opendata; 66struct nfs4_opendata;
65static int _nfs4_proc_open(struct nfs4_opendata *data); 67static int _nfs4_proc_open(struct nfs4_opendata *data);
66static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 68static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
@@ -426,17 +428,19 @@ out:
426static int nfs4_recover_session(struct nfs4_session *session) 428static int nfs4_recover_session(struct nfs4_session *session)
427{ 429{
428 struct nfs_client *clp = session->clp; 430 struct nfs_client *clp = session->clp;
431 unsigned int loop;
429 int ret; 432 int ret;
430 433
431 for (;;) { 434 for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
432 ret = nfs4_wait_clnt_recover(clp); 435 ret = nfs4_wait_clnt_recover(clp);
433 if (ret != 0) 436 if (ret != 0)
434 return ret; 437 break;
435 if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) 438 if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state))
436 break; 439 break;
437 nfs4_schedule_state_manager(clp); 440 nfs4_schedule_state_manager(clp);
441 ret = -EIO;
438 } 442 }
439 return 0; 443 return ret;
440} 444}
441 445
442static int nfs41_setup_sequence(struct nfs4_session *session, 446static int nfs41_setup_sequence(struct nfs4_session *session,
@@ -1444,18 +1448,20 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1444static int nfs4_recover_expired_lease(struct nfs_server *server) 1448static int nfs4_recover_expired_lease(struct nfs_server *server)
1445{ 1449{
1446 struct nfs_client *clp = server->nfs_client; 1450 struct nfs_client *clp = server->nfs_client;
1451 unsigned int loop;
1447 int ret; 1452 int ret;
1448 1453
1449 for (;;) { 1454 for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
1450 ret = nfs4_wait_clnt_recover(clp); 1455 ret = nfs4_wait_clnt_recover(clp);
1451 if (ret != 0) 1456 if (ret != 0)
1452 return ret; 1457 break;
1453 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && 1458 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1454 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) 1459 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1455 break; 1460 break;
1456 nfs4_schedule_state_recovery(clp); 1461 nfs4_schedule_state_recovery(clp);
1462 ret = -EIO;
1457 } 1463 }
1458 return 0; 1464 return ret;
1459} 1465}
1460 1466
1461/* 1467/*
@@ -1997,12 +2003,34 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
1997 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2003 status = nfs4_call_sync(server, &msg, &args, &res, 0);
1998 if (status == 0) { 2004 if (status == 0) {
1999 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); 2005 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
2006 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
2007 NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
2008 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
2009 NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
2010 NFS_CAP_CTIME|NFS_CAP_MTIME);
2000 if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) 2011 if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
2001 server->caps |= NFS_CAP_ACLS; 2012 server->caps |= NFS_CAP_ACLS;
2002 if (res.has_links != 0) 2013 if (res.has_links != 0)
2003 server->caps |= NFS_CAP_HARDLINKS; 2014 server->caps |= NFS_CAP_HARDLINKS;
2004 if (res.has_symlinks != 0) 2015 if (res.has_symlinks != 0)
2005 server->caps |= NFS_CAP_SYMLINKS; 2016 server->caps |= NFS_CAP_SYMLINKS;
2017 if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID)
2018 server->caps |= NFS_CAP_FILEID;
2019 if (res.attr_bitmask[1] & FATTR4_WORD1_MODE)
2020 server->caps |= NFS_CAP_MODE;
2021 if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)
2022 server->caps |= NFS_CAP_NLINK;
2023 if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER)
2024 server->caps |= NFS_CAP_OWNER;
2025 if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)
2026 server->caps |= NFS_CAP_OWNER_GROUP;
2027 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)
2028 server->caps |= NFS_CAP_ATIME;
2029 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)
2030 server->caps |= NFS_CAP_CTIME;
2031 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)
2032 server->caps |= NFS_CAP_MTIME;
2033
2006 memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); 2034 memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
2007 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; 2035 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
2008 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; 2036 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 617273e7d47f..cfc30d362f94 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -702,29 +702,12 @@ struct compound_hdr {
702 u32 minorversion; 702 u32 minorversion;
703}; 703};
704 704
705/* 705static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
706 * START OF "GENERIC" ENCODE ROUTINES. 706{
707 * These may look a little ugly since they are imported from a "generic" 707 __be32 *p = xdr_reserve_space(xdr, nbytes);
708 * set of XDR encode/decode routines which are intended to be shared by 708 BUG_ON(!p);
709 * all of our NFSv4 implementations (OpenBSD, MacOS X...). 709 return p;
710 * 710}
711 * If the pain of reading these is too great, it should be a straightforward
712 * task to translate them into Linux-specific versions which are more
713 * consistent with the style used in NFSv2/v3...
714 */
715#define WRITE32(n) *p++ = htonl(n)
716#define WRITE64(n) do { \
717 *p++ = htonl((uint32_t)((n) >> 32)); \
718 *p++ = htonl((uint32_t)(n)); \
719} while (0)
720#define WRITEMEM(ptr,nbytes) do { \
721 p = xdr_encode_opaque_fixed(p, ptr, nbytes); \
722} while (0)
723
724#define RESERVE_SPACE(nbytes) do { \
725 p = xdr_reserve_space(xdr, nbytes); \
726 BUG_ON(!p); \
727} while (0)
728 711
729static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) 712static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
730{ 713{
@@ -749,12 +732,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
749 732
750 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); 733 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
751 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); 734 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
752 RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2)); 735 p = reserve_space(xdr, 4 + hdr->taglen + 8);
753 WRITE32(hdr->taglen); 736 p = xdr_encode_opaque(p, hdr->tag, hdr->taglen);
754 WRITEMEM(hdr->tag, hdr->taglen); 737 *p++ = cpu_to_be32(hdr->minorversion);
755 WRITE32(hdr->minorversion);
756 hdr->nops_p = p; 738 hdr->nops_p = p;
757 WRITE32(hdr->nops); 739 *p = cpu_to_be32(hdr->nops);
758} 740}
759 741
760static void encode_nops(struct compound_hdr *hdr) 742static void encode_nops(struct compound_hdr *hdr)
@@ -829,55 +811,53 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
829 len += 16; 811 len += 16;
830 else if (iap->ia_valid & ATTR_MTIME) 812 else if (iap->ia_valid & ATTR_MTIME)
831 len += 4; 813 len += 4;
832 RESERVE_SPACE(len); 814 p = reserve_space(xdr, len);
833 815
834 /* 816 /*
835 * We write the bitmap length now, but leave the bitmap and the attribute 817 * We write the bitmap length now, but leave the bitmap and the attribute
836 * buffer length to be backfilled at the end of this routine. 818 * buffer length to be backfilled at the end of this routine.
837 */ 819 */
838 WRITE32(2); 820 *p++ = cpu_to_be32(2);
839 q = p; 821 q = p;
840 p += 3; 822 p += 3;
841 823
842 if (iap->ia_valid & ATTR_SIZE) { 824 if (iap->ia_valid & ATTR_SIZE) {
843 bmval0 |= FATTR4_WORD0_SIZE; 825 bmval0 |= FATTR4_WORD0_SIZE;
844 WRITE64(iap->ia_size); 826 p = xdr_encode_hyper(p, iap->ia_size);
845 } 827 }
846 if (iap->ia_valid & ATTR_MODE) { 828 if (iap->ia_valid & ATTR_MODE) {
847 bmval1 |= FATTR4_WORD1_MODE; 829 bmval1 |= FATTR4_WORD1_MODE;
848 WRITE32(iap->ia_mode & S_IALLUGO); 830 *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO);
849 } 831 }
850 if (iap->ia_valid & ATTR_UID) { 832 if (iap->ia_valid & ATTR_UID) {
851 bmval1 |= FATTR4_WORD1_OWNER; 833 bmval1 |= FATTR4_WORD1_OWNER;
852 WRITE32(owner_namelen); 834 p = xdr_encode_opaque(p, owner_name, owner_namelen);
853 WRITEMEM(owner_name, owner_namelen);
854 } 835 }
855 if (iap->ia_valid & ATTR_GID) { 836 if (iap->ia_valid & ATTR_GID) {
856 bmval1 |= FATTR4_WORD1_OWNER_GROUP; 837 bmval1 |= FATTR4_WORD1_OWNER_GROUP;
857 WRITE32(owner_grouplen); 838 p = xdr_encode_opaque(p, owner_group, owner_grouplen);
858 WRITEMEM(owner_group, owner_grouplen);
859 } 839 }
860 if (iap->ia_valid & ATTR_ATIME_SET) { 840 if (iap->ia_valid & ATTR_ATIME_SET) {
861 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 841 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
862 WRITE32(NFS4_SET_TO_CLIENT_TIME); 842 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
863 WRITE32(0); 843 *p++ = cpu_to_be32(0);
864 WRITE32(iap->ia_mtime.tv_sec); 844 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
865 WRITE32(iap->ia_mtime.tv_nsec); 845 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
866 } 846 }
867 else if (iap->ia_valid & ATTR_ATIME) { 847 else if (iap->ia_valid & ATTR_ATIME) {
868 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 848 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
869 WRITE32(NFS4_SET_TO_SERVER_TIME); 849 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
870 } 850 }
871 if (iap->ia_valid & ATTR_MTIME_SET) { 851 if (iap->ia_valid & ATTR_MTIME_SET) {
872 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; 852 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
873 WRITE32(NFS4_SET_TO_CLIENT_TIME); 853 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
874 WRITE32(0); 854 *p++ = cpu_to_be32(0);
875 WRITE32(iap->ia_mtime.tv_sec); 855 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
876 WRITE32(iap->ia_mtime.tv_nsec); 856 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
877 } 857 }
878 else if (iap->ia_valid & ATTR_MTIME) { 858 else if (iap->ia_valid & ATTR_MTIME) {
879 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; 859 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
880 WRITE32(NFS4_SET_TO_SERVER_TIME); 860 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
881 } 861 }
882 862
883 /* 863 /*
@@ -891,7 +871,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
891 len = (char *)p - (char *)q - 12; 871 len = (char *)p - (char *)q - 12;
892 *q++ = htonl(bmval0); 872 *q++ = htonl(bmval0);
893 *q++ = htonl(bmval1); 873 *q++ = htonl(bmval1);
894 *q++ = htonl(len); 874 *q = htonl(len);
895 875
896/* out: */ 876/* out: */
897} 877}
@@ -900,9 +880,9 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd
900{ 880{
901 __be32 *p; 881 __be32 *p;
902 882
903 RESERVE_SPACE(8); 883 p = reserve_space(xdr, 8);
904 WRITE32(OP_ACCESS); 884 *p++ = cpu_to_be32(OP_ACCESS);
905 WRITE32(access); 885 *p = cpu_to_be32(access);
906 hdr->nops++; 886 hdr->nops++;
907 hdr->replen += decode_access_maxsz; 887 hdr->replen += decode_access_maxsz;
908} 888}
@@ -911,10 +891,10 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
911{ 891{
912 __be32 *p; 892 __be32 *p;
913 893
914 RESERVE_SPACE(8+NFS4_STATEID_SIZE); 894 p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
915 WRITE32(OP_CLOSE); 895 *p++ = cpu_to_be32(OP_CLOSE);
916 WRITE32(arg->seqid->sequence->counter); 896 *p++ = cpu_to_be32(arg->seqid->sequence->counter);
917 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 897 xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
918 hdr->nops++; 898 hdr->nops++;
919 hdr->replen += decode_close_maxsz; 899 hdr->replen += decode_close_maxsz;
920} 900}
@@ -923,10 +903,10 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar
923{ 903{
924 __be32 *p; 904 __be32 *p;
925 905
926 RESERVE_SPACE(16); 906 p = reserve_space(xdr, 16);
927 WRITE32(OP_COMMIT); 907 *p++ = cpu_to_be32(OP_COMMIT);
928 WRITE64(args->offset); 908 p = xdr_encode_hyper(p, args->offset);
929 WRITE32(args->count); 909 *p = cpu_to_be32(args->count);
930 hdr->nops++; 910 hdr->nops++;
931 hdr->replen += decode_commit_maxsz; 911 hdr->replen += decode_commit_maxsz;
932} 912}
@@ -935,30 +915,28 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
935{ 915{
936 __be32 *p; 916 __be32 *p;
937 917
938 RESERVE_SPACE(8); 918 p = reserve_space(xdr, 8);
939 WRITE32(OP_CREATE); 919 *p++ = cpu_to_be32(OP_CREATE);
940 WRITE32(create->ftype); 920 *p = cpu_to_be32(create->ftype);
941 921
942 switch (create->ftype) { 922 switch (create->ftype) {
943 case NF4LNK: 923 case NF4LNK:
944 RESERVE_SPACE(4); 924 p = reserve_space(xdr, 4);
945 WRITE32(create->u.symlink.len); 925 *p = cpu_to_be32(create->u.symlink.len);
946 xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len); 926 xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
947 break; 927 break;
948 928
949 case NF4BLK: case NF4CHR: 929 case NF4BLK: case NF4CHR:
950 RESERVE_SPACE(8); 930 p = reserve_space(xdr, 8);
951 WRITE32(create->u.device.specdata1); 931 *p++ = cpu_to_be32(create->u.device.specdata1);
952 WRITE32(create->u.device.specdata2); 932 *p = cpu_to_be32(create->u.device.specdata2);
953 break; 933 break;
954 934
955 default: 935 default:
956 break; 936 break;
957 } 937 }
958 938
959 RESERVE_SPACE(4 + create->name->len); 939 encode_string(xdr, create->name->len, create->name->name);
960 WRITE32(create->name->len);
961 WRITEMEM(create->name->name, create->name->len);
962 hdr->nops++; 940 hdr->nops++;
963 hdr->replen += decode_create_maxsz; 941 hdr->replen += decode_create_maxsz;
964 942
@@ -969,10 +947,10 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
969{ 947{
970 __be32 *p; 948 __be32 *p;
971 949
972 RESERVE_SPACE(12); 950 p = reserve_space(xdr, 12);
973 WRITE32(OP_GETATTR); 951 *p++ = cpu_to_be32(OP_GETATTR);
974 WRITE32(1); 952 *p++ = cpu_to_be32(1);
975 WRITE32(bitmap); 953 *p = cpu_to_be32(bitmap);
976 hdr->nops++; 954 hdr->nops++;
977 hdr->replen += decode_getattr_maxsz; 955 hdr->replen += decode_getattr_maxsz;
978} 956}
@@ -981,11 +959,11 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm
981{ 959{
982 __be32 *p; 960 __be32 *p;
983 961
984 RESERVE_SPACE(16); 962 p = reserve_space(xdr, 16);
985 WRITE32(OP_GETATTR); 963 *p++ = cpu_to_be32(OP_GETATTR);
986 WRITE32(2); 964 *p++ = cpu_to_be32(2);
987 WRITE32(bm0); 965 *p++ = cpu_to_be32(bm0);
988 WRITE32(bm1); 966 *p = cpu_to_be32(bm1);
989 hdr->nops++; 967 hdr->nops++;
990 hdr->replen += decode_getattr_maxsz; 968 hdr->replen += decode_getattr_maxsz;
991} 969}
@@ -1012,8 +990,8 @@ static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1012{ 990{
1013 __be32 *p; 991 __be32 *p;
1014 992
1015 RESERVE_SPACE(4); 993 p = reserve_space(xdr, 4);
1016 WRITE32(OP_GETFH); 994 *p = cpu_to_be32(OP_GETFH);
1017 hdr->nops++; 995 hdr->nops++;
1018 hdr->replen += decode_getfh_maxsz; 996 hdr->replen += decode_getfh_maxsz;
1019} 997}
@@ -1022,10 +1000,9 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
1022{ 1000{
1023 __be32 *p; 1001 __be32 *p;
1024 1002
1025 RESERVE_SPACE(8 + name->len); 1003 p = reserve_space(xdr, 8 + name->len);
1026 WRITE32(OP_LINK); 1004 *p++ = cpu_to_be32(OP_LINK);
1027 WRITE32(name->len); 1005 xdr_encode_opaque(p, name->name, name->len);
1028 WRITEMEM(name->name, name->len);
1029 hdr->nops++; 1006 hdr->nops++;
1030 hdr->replen += decode_link_maxsz; 1007 hdr->replen += decode_link_maxsz;
1031} 1008}
@@ -1052,27 +1029,27 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
1052{ 1029{
1053 __be32 *p; 1030 __be32 *p;
1054 1031
1055 RESERVE_SPACE(32); 1032 p = reserve_space(xdr, 32);
1056 WRITE32(OP_LOCK); 1033 *p++ = cpu_to_be32(OP_LOCK);
1057 WRITE32(nfs4_lock_type(args->fl, args->block)); 1034 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block));
1058 WRITE32(args->reclaim); 1035 *p++ = cpu_to_be32(args->reclaim);
1059 WRITE64(args->fl->fl_start); 1036 p = xdr_encode_hyper(p, args->fl->fl_start);
1060 WRITE64(nfs4_lock_length(args->fl)); 1037 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1061 WRITE32(args->new_lock_owner); 1038 *p = cpu_to_be32(args->new_lock_owner);
1062 if (args->new_lock_owner){ 1039 if (args->new_lock_owner){
1063 RESERVE_SPACE(4+NFS4_STATEID_SIZE+32); 1040 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
1064 WRITE32(args->open_seqid->sequence->counter); 1041 *p++ = cpu_to_be32(args->open_seqid->sequence->counter);
1065 WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); 1042 p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
1066 WRITE32(args->lock_seqid->sequence->counter); 1043 *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
1067 WRITE64(args->lock_owner.clientid); 1044 p = xdr_encode_hyper(p, args->lock_owner.clientid);
1068 WRITE32(16); 1045 *p++ = cpu_to_be32(16);
1069 WRITEMEM("lock id:", 8); 1046 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1070 WRITE64(args->lock_owner.id); 1047 xdr_encode_hyper(p, args->lock_owner.id);
1071 } 1048 }
1072 else { 1049 else {
1073 RESERVE_SPACE(NFS4_STATEID_SIZE+4); 1050 p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
1074 WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE); 1051 p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
1075 WRITE32(args->lock_seqid->sequence->counter); 1052 *p = cpu_to_be32(args->lock_seqid->sequence->counter);
1076 } 1053 }
1077 hdr->nops++; 1054 hdr->nops++;
1078 hdr->replen += decode_lock_maxsz; 1055 hdr->replen += decode_lock_maxsz;
@@ -1082,15 +1059,15 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar
1082{ 1059{
1083 __be32 *p; 1060 __be32 *p;
1084 1061
1085 RESERVE_SPACE(52); 1062 p = reserve_space(xdr, 52);
1086 WRITE32(OP_LOCKT); 1063 *p++ = cpu_to_be32(OP_LOCKT);
1087 WRITE32(nfs4_lock_type(args->fl, 0)); 1064 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1088 WRITE64(args->fl->fl_start); 1065 p = xdr_encode_hyper(p, args->fl->fl_start);
1089 WRITE64(nfs4_lock_length(args->fl)); 1066 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1090 WRITE64(args->lock_owner.clientid); 1067 p = xdr_encode_hyper(p, args->lock_owner.clientid);
1091 WRITE32(16); 1068 *p++ = cpu_to_be32(16);
1092 WRITEMEM("lock id:", 8); 1069 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1093 WRITE64(args->lock_owner.id); 1070 xdr_encode_hyper(p, args->lock_owner.id);
1094 hdr->nops++; 1071 hdr->nops++;
1095 hdr->replen += decode_lockt_maxsz; 1072 hdr->replen += decode_lockt_maxsz;
1096} 1073}
@@ -1099,13 +1076,13 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
1099{ 1076{
1100 __be32 *p; 1077 __be32 *p;
1101 1078
1102 RESERVE_SPACE(12+NFS4_STATEID_SIZE+16); 1079 p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16);
1103 WRITE32(OP_LOCKU); 1080 *p++ = cpu_to_be32(OP_LOCKU);
1104 WRITE32(nfs4_lock_type(args->fl, 0)); 1081 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1105 WRITE32(args->seqid->sequence->counter); 1082 *p++ = cpu_to_be32(args->seqid->sequence->counter);
1106 WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE); 1083 p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
1107 WRITE64(args->fl->fl_start); 1084 p = xdr_encode_hyper(p, args->fl->fl_start);
1108 WRITE64(nfs4_lock_length(args->fl)); 1085 xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1109 hdr->nops++; 1086 hdr->nops++;
1110 hdr->replen += decode_locku_maxsz; 1087 hdr->replen += decode_locku_maxsz;
1111} 1088}
@@ -1115,10 +1092,9 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
1115 int len = name->len; 1092 int len = name->len;
1116 __be32 *p; 1093 __be32 *p;
1117 1094
1118 RESERVE_SPACE(8 + len); 1095 p = reserve_space(xdr, 8 + len);
1119 WRITE32(OP_LOOKUP); 1096 *p++ = cpu_to_be32(OP_LOOKUP);
1120 WRITE32(len); 1097 xdr_encode_opaque(p, name->name, len);
1121 WRITEMEM(name->name, len);
1122 hdr->nops++; 1098 hdr->nops++;
1123 hdr->replen += decode_lookup_maxsz; 1099 hdr->replen += decode_lookup_maxsz;
1124} 1100}
@@ -1127,21 +1103,21 @@ static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
1127{ 1103{
1128 __be32 *p; 1104 __be32 *p;
1129 1105
1130 RESERVE_SPACE(8); 1106 p = reserve_space(xdr, 8);
1131 switch (fmode & (FMODE_READ|FMODE_WRITE)) { 1107 switch (fmode & (FMODE_READ|FMODE_WRITE)) {
1132 case FMODE_READ: 1108 case FMODE_READ:
1133 WRITE32(NFS4_SHARE_ACCESS_READ); 1109 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_READ);
1134 break; 1110 break;
1135 case FMODE_WRITE: 1111 case FMODE_WRITE:
1136 WRITE32(NFS4_SHARE_ACCESS_WRITE); 1112 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_WRITE);
1137 break; 1113 break;
1138 case FMODE_READ|FMODE_WRITE: 1114 case FMODE_READ|FMODE_WRITE:
1139 WRITE32(NFS4_SHARE_ACCESS_BOTH); 1115 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_BOTH);
1140 break; 1116 break;
1141 default: 1117 default:
1142 WRITE32(0); 1118 *p++ = cpu_to_be32(0);
1143 } 1119 }
1144 WRITE32(0); /* for linux, share_deny = 0 always */ 1120 *p = cpu_to_be32(0); /* for linux, share_deny = 0 always */
1145} 1121}
1146 1122
1147static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1123static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1151,29 +1127,29 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
1151 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, 1127 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
1152 * owner 4 = 32 1128 * owner 4 = 32
1153 */ 1129 */
1154 RESERVE_SPACE(8); 1130 p = reserve_space(xdr, 8);
1155 WRITE32(OP_OPEN); 1131 *p++ = cpu_to_be32(OP_OPEN);
1156 WRITE32(arg->seqid->sequence->counter); 1132 *p = cpu_to_be32(arg->seqid->sequence->counter);
1157 encode_share_access(xdr, arg->fmode); 1133 encode_share_access(xdr, arg->fmode);
1158 RESERVE_SPACE(28); 1134 p = reserve_space(xdr, 28);
1159 WRITE64(arg->clientid); 1135 p = xdr_encode_hyper(p, arg->clientid);
1160 WRITE32(16); 1136 *p++ = cpu_to_be32(16);
1161 WRITEMEM("open id:", 8); 1137 p = xdr_encode_opaque_fixed(p, "open id:", 8);
1162 WRITE64(arg->id); 1138 xdr_encode_hyper(p, arg->id);
1163} 1139}
1164 1140
1165static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1141static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
1166{ 1142{
1167 __be32 *p; 1143 __be32 *p;
1168 1144
1169 RESERVE_SPACE(4); 1145 p = reserve_space(xdr, 4);
1170 switch(arg->open_flags & O_EXCL) { 1146 switch(arg->open_flags & O_EXCL) {
1171 case 0: 1147 case 0:
1172 WRITE32(NFS4_CREATE_UNCHECKED); 1148 *p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
1173 encode_attrs(xdr, arg->u.attrs, arg->server); 1149 encode_attrs(xdr, arg->u.attrs, arg->server);
1174 break; 1150 break;
1175 default: 1151 default:
1176 WRITE32(NFS4_CREATE_EXCLUSIVE); 1152 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
1177 encode_nfs4_verifier(xdr, &arg->u.verifier); 1153 encode_nfs4_verifier(xdr, &arg->u.verifier);
1178 } 1154 }
1179} 1155}
@@ -1182,14 +1158,14 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
1182{ 1158{
1183 __be32 *p; 1159 __be32 *p;
1184 1160
1185 RESERVE_SPACE(4); 1161 p = reserve_space(xdr, 4);
1186 switch (arg->open_flags & O_CREAT) { 1162 switch (arg->open_flags & O_CREAT) {
1187 case 0: 1163 case 0:
1188 WRITE32(NFS4_OPEN_NOCREATE); 1164 *p = cpu_to_be32(NFS4_OPEN_NOCREATE);
1189 break; 1165 break;
1190 default: 1166 default:
1191 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); 1167 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
1192 WRITE32(NFS4_OPEN_CREATE); 1168 *p = cpu_to_be32(NFS4_OPEN_CREATE);
1193 encode_createmode(xdr, arg); 1169 encode_createmode(xdr, arg);
1194 } 1170 }
1195} 1171}
@@ -1198,16 +1174,16 @@ static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delega
1198{ 1174{
1199 __be32 *p; 1175 __be32 *p;
1200 1176
1201 RESERVE_SPACE(4); 1177 p = reserve_space(xdr, 4);
1202 switch (delegation_type) { 1178 switch (delegation_type) {
1203 case 0: 1179 case 0:
1204 WRITE32(NFS4_OPEN_DELEGATE_NONE); 1180 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE);
1205 break; 1181 break;
1206 case FMODE_READ: 1182 case FMODE_READ:
1207 WRITE32(NFS4_OPEN_DELEGATE_READ); 1183 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ);
1208 break; 1184 break;
1209 case FMODE_WRITE|FMODE_READ: 1185 case FMODE_WRITE|FMODE_READ:
1210 WRITE32(NFS4_OPEN_DELEGATE_WRITE); 1186 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE);
1211 break; 1187 break;
1212 default: 1188 default:
1213 BUG(); 1189 BUG();
@@ -1218,8 +1194,8 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
1218{ 1194{
1219 __be32 *p; 1195 __be32 *p;
1220 1196
1221 RESERVE_SPACE(4); 1197 p = reserve_space(xdr, 4);
1222 WRITE32(NFS4_OPEN_CLAIM_NULL); 1198 *p = cpu_to_be32(NFS4_OPEN_CLAIM_NULL);
1223 encode_string(xdr, name->len, name->name); 1199 encode_string(xdr, name->len, name->name);
1224} 1200}
1225 1201
@@ -1227,8 +1203,8 @@ static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
1227{ 1203{
1228 __be32 *p; 1204 __be32 *p;
1229 1205
1230 RESERVE_SPACE(4); 1206 p = reserve_space(xdr, 4);
1231 WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); 1207 *p = cpu_to_be32(NFS4_OPEN_CLAIM_PREVIOUS);
1232 encode_delegation_type(xdr, type); 1208 encode_delegation_type(xdr, type);
1233} 1209}
1234 1210
@@ -1236,9 +1212,9 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
1236{ 1212{
1237 __be32 *p; 1213 __be32 *p;
1238 1214
1239 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1215 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1240 WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR); 1216 *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
1241 WRITEMEM(stateid->data, NFS4_STATEID_SIZE); 1217 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
1242 encode_string(xdr, name->len, name->name); 1218 encode_string(xdr, name->len, name->name);
1243} 1219}
1244 1220
@@ -1267,10 +1243,10 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co
1267{ 1243{
1268 __be32 *p; 1244 __be32 *p;
1269 1245
1270 RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); 1246 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
1271 WRITE32(OP_OPEN_CONFIRM); 1247 *p++ = cpu_to_be32(OP_OPEN_CONFIRM);
1272 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1248 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1273 WRITE32(arg->seqid->sequence->counter); 1249 *p = cpu_to_be32(arg->seqid->sequence->counter);
1274 hdr->nops++; 1250 hdr->nops++;
1275 hdr->replen += decode_open_confirm_maxsz; 1251 hdr->replen += decode_open_confirm_maxsz;
1276} 1252}
@@ -1279,10 +1255,10 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close
1279{ 1255{
1280 __be32 *p; 1256 __be32 *p;
1281 1257
1282 RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); 1258 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
1283 WRITE32(OP_OPEN_DOWNGRADE); 1259 *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
1284 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1260 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1285 WRITE32(arg->seqid->sequence->counter); 1261 *p = cpu_to_be32(arg->seqid->sequence->counter);
1286 encode_share_access(xdr, arg->fmode); 1262 encode_share_access(xdr, arg->fmode);
1287 hdr->nops++; 1263 hdr->nops++;
1288 hdr->replen += decode_open_downgrade_maxsz; 1264 hdr->replen += decode_open_downgrade_maxsz;
@@ -1294,10 +1270,9 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd
1294 int len = fh->size; 1270 int len = fh->size;
1295 __be32 *p; 1271 __be32 *p;
1296 1272
1297 RESERVE_SPACE(8 + len); 1273 p = reserve_space(xdr, 8 + len);
1298 WRITE32(OP_PUTFH); 1274 *p++ = cpu_to_be32(OP_PUTFH);
1299 WRITE32(len); 1275 xdr_encode_opaque(p, fh->data, len);
1300 WRITEMEM(fh->data, len);
1301 hdr->nops++; 1276 hdr->nops++;
1302 hdr->replen += decode_putfh_maxsz; 1277 hdr->replen += decode_putfh_maxsz;
1303} 1278}
@@ -1306,8 +1281,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1306{ 1281{
1307 __be32 *p; 1282 __be32 *p;
1308 1283
1309 RESERVE_SPACE(4); 1284 p = reserve_space(xdr, 4);
1310 WRITE32(OP_PUTROOTFH); 1285 *p = cpu_to_be32(OP_PUTROOTFH);
1311 hdr->nops++; 1286 hdr->nops++;
1312 hdr->replen += decode_putrootfh_maxsz; 1287 hdr->replen += decode_putrootfh_maxsz;
1313} 1288}
@@ -1317,26 +1292,26 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
1317 nfs4_stateid stateid; 1292 nfs4_stateid stateid;
1318 __be32 *p; 1293 __be32 *p;
1319 1294
1320 RESERVE_SPACE(NFS4_STATEID_SIZE); 1295 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1321 if (ctx->state != NULL) { 1296 if (ctx->state != NULL) {
1322 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); 1297 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
1323 WRITEMEM(stateid.data, NFS4_STATEID_SIZE); 1298 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
1324 } else 1299 } else
1325 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); 1300 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
1326} 1301}
1327 1302
1328static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) 1303static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
1329{ 1304{
1330 __be32 *p; 1305 __be32 *p;
1331 1306
1332 RESERVE_SPACE(4); 1307 p = reserve_space(xdr, 4);
1333 WRITE32(OP_READ); 1308 *p = cpu_to_be32(OP_READ);
1334 1309
1335 encode_stateid(xdr, args->context); 1310 encode_stateid(xdr, args->context);
1336 1311
1337 RESERVE_SPACE(12); 1312 p = reserve_space(xdr, 12);
1338 WRITE64(args->offset); 1313 p = xdr_encode_hyper(p, args->offset);
1339 WRITE32(args->count); 1314 *p = cpu_to_be32(args->count);
1340 hdr->nops++; 1315 hdr->nops++;
1341 hdr->replen += decode_read_maxsz; 1316 hdr->replen += decode_read_maxsz;
1342} 1317}
@@ -1349,20 +1324,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1349 }; 1324 };
1350 __be32 *p; 1325 __be32 *p;
1351 1326
1352 RESERVE_SPACE(12+NFS4_VERIFIER_SIZE+20); 1327 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
1353 WRITE32(OP_READDIR); 1328 *p++ = cpu_to_be32(OP_READDIR);
1354 WRITE64(readdir->cookie); 1329 p = xdr_encode_hyper(p, readdir->cookie);
1355 WRITEMEM(readdir->verifier.data, NFS4_VERIFIER_SIZE); 1330 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
1356 WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ 1331 *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */
1357 WRITE32(readdir->count); 1332 *p++ = cpu_to_be32(readdir->count);
1358 WRITE32(2); 1333 *p++ = cpu_to_be32(2);
1359 /* Switch to mounted_on_fileid if the server supports it */ 1334 /* Switch to mounted_on_fileid if the server supports it */
1360 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) 1335 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1361 attrs[0] &= ~FATTR4_WORD0_FILEID; 1336 attrs[0] &= ~FATTR4_WORD0_FILEID;
1362 else 1337 else
1363 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 1338 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1364 WRITE32(attrs[0] & readdir->bitmask[0]); 1339 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
1365 WRITE32(attrs[1] & readdir->bitmask[1]); 1340 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
1366 hdr->nops++; 1341 hdr->nops++;
1367 hdr->replen += decode_readdir_maxsz; 1342 hdr->replen += decode_readdir_maxsz;
1368 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", 1343 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
@@ -1378,8 +1353,8 @@ static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *
1378{ 1353{
1379 __be32 *p; 1354 __be32 *p;
1380 1355
1381 RESERVE_SPACE(4); 1356 p = reserve_space(xdr, 4);
1382 WRITE32(OP_READLINK); 1357 *p = cpu_to_be32(OP_READLINK);
1383 hdr->nops++; 1358 hdr->nops++;
1384 hdr->replen += decode_readlink_maxsz; 1359 hdr->replen += decode_readlink_maxsz;
1385} 1360}
@@ -1388,10 +1363,9 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc
1388{ 1363{
1389 __be32 *p; 1364 __be32 *p;
1390 1365
1391 RESERVE_SPACE(8 + name->len); 1366 p = reserve_space(xdr, 8 + name->len);
1392 WRITE32(OP_REMOVE); 1367 *p++ = cpu_to_be32(OP_REMOVE);
1393 WRITE32(name->len); 1368 xdr_encode_opaque(p, name->name, name->len);
1394 WRITEMEM(name->name, name->len);
1395 hdr->nops++; 1369 hdr->nops++;
1396 hdr->replen += decode_remove_maxsz; 1370 hdr->replen += decode_remove_maxsz;
1397} 1371}
@@ -1400,14 +1374,10 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co
1400{ 1374{
1401 __be32 *p; 1375 __be32 *p;
1402 1376
1403 RESERVE_SPACE(8 + oldname->len); 1377 p = reserve_space(xdr, 4);
1404 WRITE32(OP_RENAME); 1378 *p = cpu_to_be32(OP_RENAME);
1405 WRITE32(oldname->len); 1379 encode_string(xdr, oldname->len, oldname->name);
1406 WRITEMEM(oldname->name, oldname->len); 1380 encode_string(xdr, newname->len, newname->name);
1407
1408 RESERVE_SPACE(4 + newname->len);
1409 WRITE32(newname->len);
1410 WRITEMEM(newname->name, newname->len);
1411 hdr->nops++; 1381 hdr->nops++;
1412 hdr->replen += decode_rename_maxsz; 1382 hdr->replen += decode_rename_maxsz;
1413} 1383}
@@ -1416,9 +1386,9 @@ static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client
1416{ 1386{
1417 __be32 *p; 1387 __be32 *p;
1418 1388
1419 RESERVE_SPACE(12); 1389 p = reserve_space(xdr, 12);
1420 WRITE32(OP_RENEW); 1390 *p++ = cpu_to_be32(OP_RENEW);
1421 WRITE64(client_stateid->cl_clientid); 1391 xdr_encode_hyper(p, client_stateid->cl_clientid);
1422 hdr->nops++; 1392 hdr->nops++;
1423 hdr->replen += decode_renew_maxsz; 1393 hdr->replen += decode_renew_maxsz;
1424} 1394}
@@ -1428,8 +1398,8 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1428{ 1398{
1429 __be32 *p; 1399 __be32 *p;
1430 1400
1431 RESERVE_SPACE(4); 1401 p = reserve_space(xdr, 4);
1432 WRITE32(OP_RESTOREFH); 1402 *p = cpu_to_be32(OP_RESTOREFH);
1433 hdr->nops++; 1403 hdr->nops++;
1434 hdr->replen += decode_restorefh_maxsz; 1404 hdr->replen += decode_restorefh_maxsz;
1435} 1405}
@@ -1439,16 +1409,16 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
1439{ 1409{
1440 __be32 *p; 1410 __be32 *p;
1441 1411
1442 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1412 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1443 WRITE32(OP_SETATTR); 1413 *p++ = cpu_to_be32(OP_SETATTR);
1444 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); 1414 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
1445 RESERVE_SPACE(2*4); 1415 p = reserve_space(xdr, 2*4);
1446 WRITE32(1); 1416 *p++ = cpu_to_be32(1);
1447 WRITE32(FATTR4_WORD0_ACL); 1417 *p = cpu_to_be32(FATTR4_WORD0_ACL);
1448 if (arg->acl_len % 4) 1418 if (arg->acl_len % 4)
1449 return -EINVAL; 1419 return -EINVAL;
1450 RESERVE_SPACE(4); 1420 p = reserve_space(xdr, 4);
1451 WRITE32(arg->acl_len); 1421 *p = cpu_to_be32(arg->acl_len);
1452 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); 1422 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
1453 hdr->nops++; 1423 hdr->nops++;
1454 hdr->replen += decode_setacl_maxsz; 1424 hdr->replen += decode_setacl_maxsz;
@@ -1460,8 +1430,8 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1460{ 1430{
1461 __be32 *p; 1431 __be32 *p;
1462 1432
1463 RESERVE_SPACE(4); 1433 p = reserve_space(xdr, 4);
1464 WRITE32(OP_SAVEFH); 1434 *p = cpu_to_be32(OP_SAVEFH);
1465 hdr->nops++; 1435 hdr->nops++;
1466 hdr->replen += decode_savefh_maxsz; 1436 hdr->replen += decode_savefh_maxsz;
1467} 1437}
@@ -1470,9 +1440,9 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
1470{ 1440{
1471 __be32 *p; 1441 __be32 *p;
1472 1442
1473 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1443 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1474 WRITE32(OP_SETATTR); 1444 *p++ = cpu_to_be32(OP_SETATTR);
1475 WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); 1445 xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
1476 hdr->nops++; 1446 hdr->nops++;
1477 hdr->replen += decode_setattr_maxsz; 1447 hdr->replen += decode_setattr_maxsz;
1478 encode_attrs(xdr, arg->iap, server); 1448 encode_attrs(xdr, arg->iap, server);
@@ -1482,17 +1452,17 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
1482{ 1452{
1483 __be32 *p; 1453 __be32 *p;
1484 1454
1485 RESERVE_SPACE(4 + NFS4_VERIFIER_SIZE); 1455 p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE);
1486 WRITE32(OP_SETCLIENTID); 1456 *p++ = cpu_to_be32(OP_SETCLIENTID);
1487 WRITEMEM(setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); 1457 xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
1488 1458
1489 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); 1459 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
1490 RESERVE_SPACE(4); 1460 p = reserve_space(xdr, 4);
1491 WRITE32(setclientid->sc_prog); 1461 *p = cpu_to_be32(setclientid->sc_prog);
1492 encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); 1462 encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
1493 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); 1463 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
1494 RESERVE_SPACE(4); 1464 p = reserve_space(xdr, 4);
1495 WRITE32(setclientid->sc_cb_ident); 1465 *p = cpu_to_be32(setclientid->sc_cb_ident);
1496 hdr->nops++; 1466 hdr->nops++;
1497 hdr->replen += decode_setclientid_maxsz; 1467 hdr->replen += decode_setclientid_maxsz;
1498} 1468}
@@ -1501,10 +1471,10 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_
1501{ 1471{
1502 __be32 *p; 1472 __be32 *p;
1503 1473
1504 RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE); 1474 p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
1505 WRITE32(OP_SETCLIENTID_CONFIRM); 1475 *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
1506 WRITE64(client_state->cl_clientid); 1476 p = xdr_encode_hyper(p, client_state->cl_clientid);
1507 WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); 1477 xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
1508 hdr->nops++; 1478 hdr->nops++;
1509 hdr->replen += decode_setclientid_confirm_maxsz; 1479 hdr->replen += decode_setclientid_confirm_maxsz;
1510} 1480}
@@ -1513,15 +1483,15 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1513{ 1483{
1514 __be32 *p; 1484 __be32 *p;
1515 1485
1516 RESERVE_SPACE(4); 1486 p = reserve_space(xdr, 4);
1517 WRITE32(OP_WRITE); 1487 *p = cpu_to_be32(OP_WRITE);
1518 1488
1519 encode_stateid(xdr, args->context); 1489 encode_stateid(xdr, args->context);
1520 1490
1521 RESERVE_SPACE(16); 1491 p = reserve_space(xdr, 16);
1522 WRITE64(args->offset); 1492 p = xdr_encode_hyper(p, args->offset);
1523 WRITE32(args->stable); 1493 *p++ = cpu_to_be32(args->stable);
1524 WRITE32(args->count); 1494 *p = cpu_to_be32(args->count);
1525 1495
1526 xdr_write_pages(xdr, args->pages, args->pgbase, args->count); 1496 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
1527 hdr->nops++; 1497 hdr->nops++;
@@ -1532,10 +1502,10 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
1532{ 1502{
1533 __be32 *p; 1503 __be32 *p;
1534 1504
1535 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1505 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1536 1506
1537 WRITE32(OP_DELEGRETURN); 1507 *p++ = cpu_to_be32(OP_DELEGRETURN);
1538 WRITEMEM(stateid->data, NFS4_STATEID_SIZE); 1508 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
1539 hdr->nops++; 1509 hdr->nops++;
1540 hdr->replen += decode_delegreturn_maxsz; 1510 hdr->replen += decode_delegreturn_maxsz;
1541} 1511}
@@ -1548,16 +1518,16 @@ static void encode_exchange_id(struct xdr_stream *xdr,
1548{ 1518{
1549 __be32 *p; 1519 __be32 *p;
1550 1520
1551 RESERVE_SPACE(4 + sizeof(args->verifier->data)); 1521 p = reserve_space(xdr, 4 + sizeof(args->verifier->data));
1552 WRITE32(OP_EXCHANGE_ID); 1522 *p++ = cpu_to_be32(OP_EXCHANGE_ID);
1553 WRITEMEM(args->verifier->data, sizeof(args->verifier->data)); 1523 xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data));
1554 1524
1555 encode_string(xdr, args->id_len, args->id); 1525 encode_string(xdr, args->id_len, args->id);
1556 1526
1557 RESERVE_SPACE(12); 1527 p = reserve_space(xdr, 12);
1558 WRITE32(args->flags); 1528 *p++ = cpu_to_be32(args->flags);
1559 WRITE32(0); /* zero length state_protect4_a */ 1529 *p++ = cpu_to_be32(0); /* zero length state_protect4_a */
1560 WRITE32(0); /* zero length implementation id array */ 1530 *p = cpu_to_be32(0); /* zero length implementation id array */
1561 hdr->nops++; 1531 hdr->nops++;
1562 hdr->replen += decode_exchange_id_maxsz; 1532 hdr->replen += decode_exchange_id_maxsz;
1563} 1533}
@@ -1571,55 +1541,43 @@ static void encode_create_session(struct xdr_stream *xdr,
1571 uint32_t len; 1541 uint32_t len;
1572 struct nfs_client *clp = args->client; 1542 struct nfs_client *clp = args->client;
1573 1543
1574 RESERVE_SPACE(4); 1544 len = scnprintf(machine_name, sizeof(machine_name), "%s",
1575 WRITE32(OP_CREATE_SESSION); 1545 clp->cl_ipaddr);
1576
1577 RESERVE_SPACE(8);
1578 WRITE64(clp->cl_ex_clid);
1579 1546
1580 RESERVE_SPACE(8); 1547 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1581 WRITE32(clp->cl_seqid); /*Sequence id */ 1548 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1582 WRITE32(args->flags); /*flags */ 1549 p = xdr_encode_hyper(p, clp->cl_ex_clid);
1550 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1551 *p++ = cpu_to_be32(args->flags); /*flags */
1583 1552
1584 RESERVE_SPACE(2*28); /* 2 channel_attrs */
1585 /* Fore Channel */ 1553 /* Fore Channel */
1586 WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ 1554 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
1587 WRITE32(args->fc_attrs.max_rqst_sz); /* max req size */ 1555 *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */
1588 WRITE32(args->fc_attrs.max_resp_sz); /* max resp size */ 1556 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */
1589 WRITE32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ 1557 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */
1590 WRITE32(args->fc_attrs.max_ops); /* max operations */ 1558 *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */
1591 WRITE32(args->fc_attrs.max_reqs); /* max requests */ 1559 *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */
1592 WRITE32(0); /* rdmachannel_attrs */ 1560 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
1593 1561
1594 /* Back Channel */ 1562 /* Back Channel */
1595 WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ 1563 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
1596 WRITE32(args->bc_attrs.max_rqst_sz); /* max req size */ 1564 *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */
1597 WRITE32(args->bc_attrs.max_resp_sz); /* max resp size */ 1565 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */
1598 WRITE32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ 1566 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */
1599 WRITE32(args->bc_attrs.max_ops); /* max operations */ 1567 *p++ = cpu_to_be32(args->bc_attrs.max_ops); /* max operations */
1600 WRITE32(args->bc_attrs.max_reqs); /* max requests */ 1568 *p++ = cpu_to_be32(args->bc_attrs.max_reqs); /* max requests */
1601 WRITE32(0); /* rdmachannel_attrs */ 1569 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
1602 1570
1603 RESERVE_SPACE(4); 1571 *p++ = cpu_to_be32(args->cb_program); /* cb_program */
1604 WRITE32(args->cb_program); /* cb_program */ 1572 *p++ = cpu_to_be32(1);
1605 1573 *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
1606 RESERVE_SPACE(4); /* # of security flavors */
1607 WRITE32(1);
1608
1609 RESERVE_SPACE(4);
1610 WRITE32(RPC_AUTH_UNIX); /* auth_sys */
1611 1574
1612 /* authsys_parms rfc1831 */ 1575 /* authsys_parms rfc1831 */
1613 RESERVE_SPACE(4); 1576 *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */
1614 WRITE32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ 1577 p = xdr_encode_opaque(p, machine_name, len);
1615 len = scnprintf(machine_name, sizeof(machine_name), "%s", 1578 *p++ = cpu_to_be32(0); /* UID */
1616 clp->cl_ipaddr); 1579 *p++ = cpu_to_be32(0); /* GID */
1617 RESERVE_SPACE(16 + len); 1580 *p = cpu_to_be32(0); /* No more gids */
1618 WRITE32(len);
1619 WRITEMEM(machine_name, len);
1620 WRITE32(0); /* UID */
1621 WRITE32(0); /* GID */
1622 WRITE32(0); /* No more gids */
1623 hdr->nops++; 1581 hdr->nops++;
1624 hdr->replen += decode_create_session_maxsz; 1582 hdr->replen += decode_create_session_maxsz;
1625} 1583}
@@ -1629,9 +1587,9 @@ static void encode_destroy_session(struct xdr_stream *xdr,
1629 struct compound_hdr *hdr) 1587 struct compound_hdr *hdr)
1630{ 1588{
1631 __be32 *p; 1589 __be32 *p;
1632 RESERVE_SPACE(4 + NFS4_MAX_SESSIONID_LEN); 1590 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN);
1633 WRITE32(OP_DESTROY_SESSION); 1591 *p++ = cpu_to_be32(OP_DESTROY_SESSION);
1634 WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1592 xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1635 hdr->nops++; 1593 hdr->nops++;
1636 hdr->replen += decode_destroy_session_maxsz; 1594 hdr->replen += decode_destroy_session_maxsz;
1637} 1595}
@@ -1655,8 +1613,8 @@ static void encode_sequence(struct xdr_stream *xdr,
1655 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); 1613 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
1656 slot = tp->slots + args->sa_slotid; 1614 slot = tp->slots + args->sa_slotid;
1657 1615
1658 RESERVE_SPACE(4); 1616 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16);
1659 WRITE32(OP_SEQUENCE); 1617 *p++ = cpu_to_be32(OP_SEQUENCE);
1660 1618
1661 /* 1619 /*
1662 * Sessionid + seqid + slotid + max slotid + cache_this 1620 * Sessionid + seqid + slotid + max slotid + cache_this
@@ -1670,12 +1628,11 @@ static void encode_sequence(struct xdr_stream *xdr,
1670 ((u32 *)session->sess_id.data)[3], 1628 ((u32 *)session->sess_id.data)[3],
1671 slot->seq_nr, args->sa_slotid, 1629 slot->seq_nr, args->sa_slotid,
1672 tp->highest_used_slotid, args->sa_cache_this); 1630 tp->highest_used_slotid, args->sa_cache_this);
1673 RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 16); 1631 p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1674 WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1632 *p++ = cpu_to_be32(slot->seq_nr);
1675 WRITE32(slot->seq_nr); 1633 *p++ = cpu_to_be32(args->sa_slotid);
1676 WRITE32(args->sa_slotid); 1634 *p++ = cpu_to_be32(tp->highest_used_slotid);
1677 WRITE32(tp->highest_used_slotid); 1635 *p = cpu_to_be32(args->sa_cache_this);
1678 WRITE32(args->sa_cache_this);
1679 hdr->nops++; 1636 hdr->nops++;
1680 hdr->replen += decode_sequence_maxsz; 1637 hdr->replen += decode_sequence_maxsz;
1681#endif /* CONFIG_NFS_V4_1 */ 1638#endif /* CONFIG_NFS_V4_1 */
@@ -2466,68 +2423,53 @@ static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
2466} 2423}
2467#endif /* CONFIG_NFS_V4_1 */ 2424#endif /* CONFIG_NFS_V4_1 */
2468 2425
2469/* 2426static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
2470 * START OF "GENERIC" DECODE ROUTINES. 2427{
2471 * These may look a little ugly since they are imported from a "generic" 2428 dprintk("nfs: %s: prematurely hit end of receive buffer. "
2472 * set of XDR encode/decode routines which are intended to be shared by 2429 "Remaining buffer length is %tu words.\n",
2473 * all of our NFSv4 implementations (OpenBSD, MacOS X...). 2430 func, xdr->end - xdr->p);
2474 * 2431}
2475 * If the pain of reading these is too great, it should be a straightforward
2476 * task to translate them into Linux-specific versions which are more
2477 * consistent with the style used in NFSv2/v3...
2478 */
2479#define READ32(x) (x) = ntohl(*p++)
2480#define READ64(x) do { \
2481 (x) = (u64)ntohl(*p++) << 32; \
2482 (x) |= ntohl(*p++); \
2483} while (0)
2484#define READTIME(x) do { \
2485 p++; \
2486 (x.tv_sec) = ntohl(*p++); \
2487 (x.tv_nsec) = ntohl(*p++); \
2488} while (0)
2489#define COPYMEM(x,nbytes) do { \
2490 memcpy((x), p, nbytes); \
2491 p += XDR_QUADLEN(nbytes); \
2492} while (0)
2493
2494#define READ_BUF(nbytes) do { \
2495 p = xdr_inline_decode(xdr, nbytes); \
2496 if (unlikely(!p)) { \
2497 dprintk("nfs: %s: prematurely hit end of receive" \
2498 " buffer\n", __func__); \
2499 dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
2500 __func__, xdr->p, nbytes, xdr->end); \
2501 return -EIO; \
2502 } \
2503} while (0)
2504 2432
2505static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) 2433static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
2506{ 2434{
2507 __be32 *p; 2435 __be32 *p;
2508 2436
2509 READ_BUF(4); 2437 p = xdr_inline_decode(xdr, 4);
2510 READ32(*len); 2438 if (unlikely(!p))
2511 READ_BUF(*len); 2439 goto out_overflow;
2440 *len = be32_to_cpup(p);
2441 p = xdr_inline_decode(xdr, *len);
2442 if (unlikely(!p))
2443 goto out_overflow;
2512 *string = (char *)p; 2444 *string = (char *)p;
2513 return 0; 2445 return 0;
2446out_overflow:
2447 print_overflow_msg(__func__, xdr);
2448 return -EIO;
2514} 2449}
2515 2450
2516static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) 2451static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
2517{ 2452{
2518 __be32 *p; 2453 __be32 *p;
2519 2454
2520 READ_BUF(8); 2455 p = xdr_inline_decode(xdr, 8);
2521 READ32(hdr->status); 2456 if (unlikely(!p))
2522 READ32(hdr->taglen); 2457 goto out_overflow;
2458 hdr->status = be32_to_cpup(p++);
2459 hdr->taglen = be32_to_cpup(p);
2523 2460
2524 READ_BUF(hdr->taglen + 4); 2461 p = xdr_inline_decode(xdr, hdr->taglen + 4);
2462 if (unlikely(!p))
2463 goto out_overflow;
2525 hdr->tag = (char *)p; 2464 hdr->tag = (char *)p;
2526 p += XDR_QUADLEN(hdr->taglen); 2465 p += XDR_QUADLEN(hdr->taglen);
2527 READ32(hdr->nops); 2466 hdr->nops = be32_to_cpup(p);
2528 if (unlikely(hdr->nops < 1)) 2467 if (unlikely(hdr->nops < 1))
2529 return nfs4_stat_to_errno(hdr->status); 2468 return nfs4_stat_to_errno(hdr->status);
2530 return 0; 2469 return 0;
2470out_overflow:
2471 print_overflow_msg(__func__, xdr);
2472 return -EIO;
2531} 2473}
2532 2474
2533static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) 2475static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
@@ -2536,18 +2478,23 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
2536 uint32_t opnum; 2478 uint32_t opnum;
2537 int32_t nfserr; 2479 int32_t nfserr;
2538 2480
2539 READ_BUF(8); 2481 p = xdr_inline_decode(xdr, 8);
2540 READ32(opnum); 2482 if (unlikely(!p))
2483 goto out_overflow;
2484 opnum = be32_to_cpup(p++);
2541 if (opnum != expected) { 2485 if (opnum != expected) {
2542 dprintk("nfs: Server returned operation" 2486 dprintk("nfs: Server returned operation"
2543 " %d but we issued a request for %d\n", 2487 " %d but we issued a request for %d\n",
2544 opnum, expected); 2488 opnum, expected);
2545 return -EIO; 2489 return -EIO;
2546 } 2490 }
2547 READ32(nfserr); 2491 nfserr = be32_to_cpup(p);
2548 if (nfserr != NFS_OK) 2492 if (nfserr != NFS_OK)
2549 return nfs4_stat_to_errno(nfserr); 2493 return nfs4_stat_to_errno(nfserr);
2550 return 0; 2494 return 0;
2495out_overflow:
2496 print_overflow_msg(__func__, xdr);
2497 return -EIO;
2551} 2498}
2552 2499
2553/* Dummy routine */ 2500/* Dummy routine */
@@ -2557,8 +2504,11 @@ static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
2557 unsigned int strlen; 2504 unsigned int strlen;
2558 char *str; 2505 char *str;
2559 2506
2560 READ_BUF(12); 2507 p = xdr_inline_decode(xdr, 12);
2561 return decode_opaque_inline(xdr, &strlen, &str); 2508 if (likely(p))
2509 return decode_opaque_inline(xdr, &strlen, &str);
2510 print_overflow_msg(__func__, xdr);
2511 return -EIO;
2562} 2512}
2563 2513
2564static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) 2514static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
@@ -2566,27 +2516,39 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
2566 uint32_t bmlen; 2516 uint32_t bmlen;
2567 __be32 *p; 2517 __be32 *p;
2568 2518
2569 READ_BUF(4); 2519 p = xdr_inline_decode(xdr, 4);
2570 READ32(bmlen); 2520 if (unlikely(!p))
2521 goto out_overflow;
2522 bmlen = be32_to_cpup(p);
2571 2523
2572 bitmap[0] = bitmap[1] = 0; 2524 bitmap[0] = bitmap[1] = 0;
2573 READ_BUF((bmlen << 2)); 2525 p = xdr_inline_decode(xdr, (bmlen << 2));
2526 if (unlikely(!p))
2527 goto out_overflow;
2574 if (bmlen > 0) { 2528 if (bmlen > 0) {
2575 READ32(bitmap[0]); 2529 bitmap[0] = be32_to_cpup(p++);
2576 if (bmlen > 1) 2530 if (bmlen > 1)
2577 READ32(bitmap[1]); 2531 bitmap[1] = be32_to_cpup(p);
2578 } 2532 }
2579 return 0; 2533 return 0;
2534out_overflow:
2535 print_overflow_msg(__func__, xdr);
2536 return -EIO;
2580} 2537}
2581 2538
2582static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep) 2539static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep)
2583{ 2540{
2584 __be32 *p; 2541 __be32 *p;
2585 2542
2586 READ_BUF(4); 2543 p = xdr_inline_decode(xdr, 4);
2587 READ32(*attrlen); 2544 if (unlikely(!p))
2545 goto out_overflow;
2546 *attrlen = be32_to_cpup(p);
2588 *savep = xdr->p; 2547 *savep = xdr->p;
2589 return 0; 2548 return 0;
2549out_overflow:
2550 print_overflow_msg(__func__, xdr);
2551 return -EIO;
2590} 2552}
2591 2553
2592static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) 2554static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
@@ -2609,8 +2571,10 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
2609 if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U))) 2571 if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
2610 return -EIO; 2572 return -EIO;
2611 if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) { 2573 if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) {
2612 READ_BUF(4); 2574 p = xdr_inline_decode(xdr, 4);
2613 READ32(*type); 2575 if (unlikely(!p))
2576 goto out_overflow;
2577 *type = be32_to_cpup(p);
2614 if (*type < NF4REG || *type > NF4NAMEDATTR) { 2578 if (*type < NF4REG || *type > NF4NAMEDATTR) {
2615 dprintk("%s: bad type %d\n", __func__, *type); 2579 dprintk("%s: bad type %d\n", __func__, *type);
2616 return -EIO; 2580 return -EIO;
@@ -2620,6 +2584,9 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
2620 } 2584 }
2621 dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]); 2585 dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
2622 return ret; 2586 return ret;
2587out_overflow:
2588 print_overflow_msg(__func__, xdr);
2589 return -EIO;
2623} 2590}
2624 2591
2625static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) 2592static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
@@ -2631,14 +2598,19 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
2631 if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U))) 2598 if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
2632 return -EIO; 2599 return -EIO;
2633 if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) { 2600 if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) {
2634 READ_BUF(8); 2601 p = xdr_inline_decode(xdr, 8);
2635 READ64(*change); 2602 if (unlikely(!p))
2603 goto out_overflow;
2604 xdr_decode_hyper(p, change);
2636 bitmap[0] &= ~FATTR4_WORD0_CHANGE; 2605 bitmap[0] &= ~FATTR4_WORD0_CHANGE;
2637 ret = NFS_ATTR_FATTR_CHANGE; 2606 ret = NFS_ATTR_FATTR_CHANGE;
2638 } 2607 }
2639 dprintk("%s: change attribute=%Lu\n", __func__, 2608 dprintk("%s: change attribute=%Lu\n", __func__,
2640 (unsigned long long)*change); 2609 (unsigned long long)*change);
2641 return ret; 2610 return ret;
2611out_overflow:
2612 print_overflow_msg(__func__, xdr);
2613 return -EIO;
2642} 2614}
2643 2615
2644static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size) 2616static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
@@ -2650,13 +2622,18 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
2650 if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U))) 2622 if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
2651 return -EIO; 2623 return -EIO;
2652 if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) { 2624 if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) {
2653 READ_BUF(8); 2625 p = xdr_inline_decode(xdr, 8);
2654 READ64(*size); 2626 if (unlikely(!p))
2627 goto out_overflow;
2628 xdr_decode_hyper(p, size);
2655 bitmap[0] &= ~FATTR4_WORD0_SIZE; 2629 bitmap[0] &= ~FATTR4_WORD0_SIZE;
2656 ret = NFS_ATTR_FATTR_SIZE; 2630 ret = NFS_ATTR_FATTR_SIZE;
2657 } 2631 }
2658 dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size); 2632 dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
2659 return ret; 2633 return ret;
2634out_overflow:
2635 print_overflow_msg(__func__, xdr);
2636 return -EIO;
2660} 2637}
2661 2638
2662static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2639static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2667,12 +2644,17 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
2667 if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U))) 2644 if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U)))
2668 return -EIO; 2645 return -EIO;
2669 if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) { 2646 if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) {
2670 READ_BUF(4); 2647 p = xdr_inline_decode(xdr, 4);
2671 READ32(*res); 2648 if (unlikely(!p))
2649 goto out_overflow;
2650 *res = be32_to_cpup(p);
2672 bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; 2651 bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
2673 } 2652 }
2674 dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); 2653 dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true");
2675 return 0; 2654 return 0;
2655out_overflow:
2656 print_overflow_msg(__func__, xdr);
2657 return -EIO;
2676} 2658}
2677 2659
2678static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2660static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2683,12 +2665,17 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
2683 if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U))) 2665 if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U)))
2684 return -EIO; 2666 return -EIO;
2685 if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) { 2667 if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) {
2686 READ_BUF(4); 2668 p = xdr_inline_decode(xdr, 4);
2687 READ32(*res); 2669 if (unlikely(!p))
2670 goto out_overflow;
2671 *res = be32_to_cpup(p);
2688 bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; 2672 bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
2689 } 2673 }
2690 dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); 2674 dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true");
2691 return 0; 2675 return 0;
2676out_overflow:
2677 print_overflow_msg(__func__, xdr);
2678 return -EIO;
2692} 2679}
2693 2680
2694static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) 2681static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
@@ -2701,9 +2688,11 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
2701 if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U))) 2688 if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U)))
2702 return -EIO; 2689 return -EIO;
2703 if (likely(bitmap[0] & FATTR4_WORD0_FSID)) { 2690 if (likely(bitmap[0] & FATTR4_WORD0_FSID)) {
2704 READ_BUF(16); 2691 p = xdr_inline_decode(xdr, 16);
2705 READ64(fsid->major); 2692 if (unlikely(!p))
2706 READ64(fsid->minor); 2693 goto out_overflow;
2694 p = xdr_decode_hyper(p, &fsid->major);
2695 xdr_decode_hyper(p, &fsid->minor);
2707 bitmap[0] &= ~FATTR4_WORD0_FSID; 2696 bitmap[0] &= ~FATTR4_WORD0_FSID;
2708 ret = NFS_ATTR_FATTR_FSID; 2697 ret = NFS_ATTR_FATTR_FSID;
2709 } 2698 }
@@ -2711,6 +2700,9 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
2711 (unsigned long long)fsid->major, 2700 (unsigned long long)fsid->major,
2712 (unsigned long long)fsid->minor); 2701 (unsigned long long)fsid->minor);
2713 return ret; 2702 return ret;
2703out_overflow:
2704 print_overflow_msg(__func__, xdr);
2705 return -EIO;
2714} 2706}
2715 2707
2716static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2708static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2721,12 +2713,17 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint
2721 if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U))) 2713 if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U)))
2722 return -EIO; 2714 return -EIO;
2723 if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) { 2715 if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) {
2724 READ_BUF(4); 2716 p = xdr_inline_decode(xdr, 4);
2725 READ32(*res); 2717 if (unlikely(!p))
2718 goto out_overflow;
2719 *res = be32_to_cpup(p);
2726 bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; 2720 bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME;
2727 } 2721 }
2728 dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); 2722 dprintk("%s: file size=%u\n", __func__, (unsigned int)*res);
2729 return 0; 2723 return 0;
2724out_overflow:
2725 print_overflow_msg(__func__, xdr);
2726 return -EIO;
2730} 2727}
2731 2728
2732static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2729static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2737,12 +2734,17 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
2737 if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U))) 2734 if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
2738 return -EIO; 2735 return -EIO;
2739 if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) { 2736 if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
2740 READ_BUF(4); 2737 p = xdr_inline_decode(xdr, 4);
2741 READ32(*res); 2738 if (unlikely(!p))
2739 goto out_overflow;
2740 *res = be32_to_cpup(p);
2742 bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT; 2741 bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT;
2743 } 2742 }
2744 dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res); 2743 dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res);
2745 return 0; 2744 return 0;
2745out_overflow:
2746 print_overflow_msg(__func__, xdr);
2747 return -EIO;
2746} 2748}
2747 2749
2748static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) 2750static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2754,13 +2756,18 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
2754 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U))) 2756 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
2755 return -EIO; 2757 return -EIO;
2756 if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) { 2758 if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) {
2757 READ_BUF(8); 2759 p = xdr_inline_decode(xdr, 8);
2758 READ64(*fileid); 2760 if (unlikely(!p))
2761 goto out_overflow;
2762 xdr_decode_hyper(p, fileid);
2759 bitmap[0] &= ~FATTR4_WORD0_FILEID; 2763 bitmap[0] &= ~FATTR4_WORD0_FILEID;
2760 ret = NFS_ATTR_FATTR_FILEID; 2764 ret = NFS_ATTR_FATTR_FILEID;
2761 } 2765 }
2762 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 2766 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
2763 return ret; 2767 return ret;
2768out_overflow:
2769 print_overflow_msg(__func__, xdr);
2770 return -EIO;
2764} 2771}
2765 2772
2766static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) 2773static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2772,13 +2779,18 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
2772 if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U))) 2779 if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
2773 return -EIO; 2780 return -EIO;
2774 if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) { 2781 if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) {
2775 READ_BUF(8); 2782 p = xdr_inline_decode(xdr, 8);
2776 READ64(*fileid); 2783 if (unlikely(!p))
2784 goto out_overflow;
2785 xdr_decode_hyper(p, fileid);
2777 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 2786 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
2778 ret = NFS_ATTR_FATTR_FILEID; 2787 ret = NFS_ATTR_FATTR_FILEID;
2779 } 2788 }
2780 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 2789 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
2781 return ret; 2790 return ret;
2791out_overflow:
2792 print_overflow_msg(__func__, xdr);
2793 return -EIO;
2782} 2794}
2783 2795
2784static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2796static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2790,12 +2802,17 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
2790 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U))) 2802 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U)))
2791 return -EIO; 2803 return -EIO;
2792 if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) { 2804 if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) {
2793 READ_BUF(8); 2805 p = xdr_inline_decode(xdr, 8);
2794 READ64(*res); 2806 if (unlikely(!p))
2807 goto out_overflow;
2808 xdr_decode_hyper(p, res);
2795 bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL; 2809 bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL;
2796 } 2810 }
2797 dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res); 2811 dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res);
2798 return status; 2812 return status;
2813out_overflow:
2814 print_overflow_msg(__func__, xdr);
2815 return -EIO;
2799} 2816}
2800 2817
2801static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2818static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2807,12 +2824,17 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
2807 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U))) 2824 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U)))
2808 return -EIO; 2825 return -EIO;
2809 if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) { 2826 if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) {
2810 READ_BUF(8); 2827 p = xdr_inline_decode(xdr, 8);
2811 READ64(*res); 2828 if (unlikely(!p))
2829 goto out_overflow;
2830 xdr_decode_hyper(p, res);
2812 bitmap[0] &= ~FATTR4_WORD0_FILES_FREE; 2831 bitmap[0] &= ~FATTR4_WORD0_FILES_FREE;
2813 } 2832 }
2814 dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res); 2833 dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res);
2815 return status; 2834 return status;
2835out_overflow:
2836 print_overflow_msg(__func__, xdr);
2837 return -EIO;
2816} 2838}
2817 2839
2818static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2840static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2824,12 +2846,17 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
2824 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U))) 2846 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U)))
2825 return -EIO; 2847 return -EIO;
2826 if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) { 2848 if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) {
2827 READ_BUF(8); 2849 p = xdr_inline_decode(xdr, 8);
2828 READ64(*res); 2850 if (unlikely(!p))
2851 goto out_overflow;
2852 xdr_decode_hyper(p, res);
2829 bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL; 2853 bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL;
2830 } 2854 }
2831 dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res); 2855 dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res);
2832 return status; 2856 return status;
2857out_overflow:
2858 print_overflow_msg(__func__, xdr);
2859 return -EIO;
2833} 2860}
2834 2861
2835static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) 2862static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
@@ -2838,8 +2865,10 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
2838 __be32 *p; 2865 __be32 *p;
2839 int status = 0; 2866 int status = 0;
2840 2867
2841 READ_BUF(4); 2868 p = xdr_inline_decode(xdr, 4);
2842 READ32(n); 2869 if (unlikely(!p))
2870 goto out_overflow;
2871 n = be32_to_cpup(p);
2843 if (n == 0) 2872 if (n == 0)
2844 goto root_path; 2873 goto root_path;
2845 dprintk("path "); 2874 dprintk("path ");
@@ -2873,6 +2902,9 @@ out_eio:
2873 dprintk(" status %d", status); 2902 dprintk(" status %d", status);
2874 status = -EIO; 2903 status = -EIO;
2875 goto out; 2904 goto out;
2905out_overflow:
2906 print_overflow_msg(__func__, xdr);
2907 return -EIO;
2876} 2908}
2877 2909
2878static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res) 2910static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res)
@@ -2890,8 +2922,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2890 status = decode_pathname(xdr, &res->fs_path); 2922 status = decode_pathname(xdr, &res->fs_path);
2891 if (unlikely(status != 0)) 2923 if (unlikely(status != 0))
2892 goto out; 2924 goto out;
2893 READ_BUF(4); 2925 p = xdr_inline_decode(xdr, 4);
2894 READ32(n); 2926 if (unlikely(!p))
2927 goto out_overflow;
2928 n = be32_to_cpup(p);
2895 if (n <= 0) 2929 if (n <= 0)
2896 goto out_eio; 2930 goto out_eio;
2897 res->nlocations = 0; 2931 res->nlocations = 0;
@@ -2899,8 +2933,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2899 u32 m; 2933 u32 m;
2900 struct nfs4_fs_location *loc = &res->locations[res->nlocations]; 2934 struct nfs4_fs_location *loc = &res->locations[res->nlocations];
2901 2935
2902 READ_BUF(4); 2936 p = xdr_inline_decode(xdr, 4);
2903 READ32(m); 2937 if (unlikely(!p))
2938 goto out_overflow;
2939 m = be32_to_cpup(p);
2904 2940
2905 loc->nservers = 0; 2941 loc->nservers = 0;
2906 dprintk("%s: servers ", __func__); 2942 dprintk("%s: servers ", __func__);
@@ -2939,6 +2975,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2939out: 2975out:
2940 dprintk("%s: fs_locations done, error = %d\n", __func__, status); 2976 dprintk("%s: fs_locations done, error = %d\n", __func__, status);
2941 return status; 2977 return status;
2978out_overflow:
2979 print_overflow_msg(__func__, xdr);
2942out_eio: 2980out_eio:
2943 status = -EIO; 2981 status = -EIO;
2944 goto out; 2982 goto out;
@@ -2953,12 +2991,17 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin
2953 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U))) 2991 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U)))
2954 return -EIO; 2992 return -EIO;
2955 if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) { 2993 if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) {
2956 READ_BUF(8); 2994 p = xdr_inline_decode(xdr, 8);
2957 READ64(*res); 2995 if (unlikely(!p))
2996 goto out_overflow;
2997 xdr_decode_hyper(p, res);
2958 bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE; 2998 bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE;
2959 } 2999 }
2960 dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res); 3000 dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res);
2961 return status; 3001 return status;
3002out_overflow:
3003 print_overflow_msg(__func__, xdr);
3004 return -EIO;
2962} 3005}
2963 3006
2964static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink) 3007static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
@@ -2970,12 +3013,17 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
2970 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U))) 3013 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U)))
2971 return -EIO; 3014 return -EIO;
2972 if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) { 3015 if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) {
2973 READ_BUF(4); 3016 p = xdr_inline_decode(xdr, 4);
2974 READ32(*maxlink); 3017 if (unlikely(!p))
3018 goto out_overflow;
3019 *maxlink = be32_to_cpup(p);
2975 bitmap[0] &= ~FATTR4_WORD0_MAXLINK; 3020 bitmap[0] &= ~FATTR4_WORD0_MAXLINK;
2976 } 3021 }
2977 dprintk("%s: maxlink=%u\n", __func__, *maxlink); 3022 dprintk("%s: maxlink=%u\n", __func__, *maxlink);
2978 return status; 3023 return status;
3024out_overflow:
3025 print_overflow_msg(__func__, xdr);
3026 return -EIO;
2979} 3027}
2980 3028
2981static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname) 3029static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
@@ -2987,12 +3035,17 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
2987 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U))) 3035 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U)))
2988 return -EIO; 3036 return -EIO;
2989 if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) { 3037 if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) {
2990 READ_BUF(4); 3038 p = xdr_inline_decode(xdr, 4);
2991 READ32(*maxname); 3039 if (unlikely(!p))
3040 goto out_overflow;
3041 *maxname = be32_to_cpup(p);
2992 bitmap[0] &= ~FATTR4_WORD0_MAXNAME; 3042 bitmap[0] &= ~FATTR4_WORD0_MAXNAME;
2993 } 3043 }
2994 dprintk("%s: maxname=%u\n", __func__, *maxname); 3044 dprintk("%s: maxname=%u\n", __func__, *maxname);
2995 return status; 3045 return status;
3046out_overflow:
3047 print_overflow_msg(__func__, xdr);
3048 return -EIO;
2996} 3049}
2997 3050
2998static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3051static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3005,8 +3058,10 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
3005 return -EIO; 3058 return -EIO;
3006 if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) { 3059 if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) {
3007 uint64_t maxread; 3060 uint64_t maxread;
3008 READ_BUF(8); 3061 p = xdr_inline_decode(xdr, 8);
3009 READ64(maxread); 3062 if (unlikely(!p))
3063 goto out_overflow;
3064 xdr_decode_hyper(p, &maxread);
3010 if (maxread > 0x7FFFFFFF) 3065 if (maxread > 0x7FFFFFFF)
3011 maxread = 0x7FFFFFFF; 3066 maxread = 0x7FFFFFFF;
3012 *res = (uint32_t)maxread; 3067 *res = (uint32_t)maxread;
@@ -3014,6 +3069,9 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
3014 } 3069 }
3015 dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res); 3070 dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res);
3016 return status; 3071 return status;
3072out_overflow:
3073 print_overflow_msg(__func__, xdr);
3074 return -EIO;
3017} 3075}
3018 3076
3019static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3077static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3026,8 +3084,10 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
3026 return -EIO; 3084 return -EIO;
3027 if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) { 3085 if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) {
3028 uint64_t maxwrite; 3086 uint64_t maxwrite;
3029 READ_BUF(8); 3087 p = xdr_inline_decode(xdr, 8);
3030 READ64(maxwrite); 3088 if (unlikely(!p))
3089 goto out_overflow;
3090 xdr_decode_hyper(p, &maxwrite);
3031 if (maxwrite > 0x7FFFFFFF) 3091 if (maxwrite > 0x7FFFFFFF)
3032 maxwrite = 0x7FFFFFFF; 3092 maxwrite = 0x7FFFFFFF;
3033 *res = (uint32_t)maxwrite; 3093 *res = (uint32_t)maxwrite;
@@ -3035,6 +3095,9 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
3035 } 3095 }
3036 dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res); 3096 dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res);
3037 return status; 3097 return status;
3098out_overflow:
3099 print_overflow_msg(__func__, xdr);
3100 return -EIO;
3038} 3101}
3039 3102
3040static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode) 3103static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
@@ -3047,14 +3110,19 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m
3047 if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U))) 3110 if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
3048 return -EIO; 3111 return -EIO;
3049 if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { 3112 if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
3050 READ_BUF(4); 3113 p = xdr_inline_decode(xdr, 4);
3051 READ32(tmp); 3114 if (unlikely(!p))
3115 goto out_overflow;
3116 tmp = be32_to_cpup(p);
3052 *mode = tmp & ~S_IFMT; 3117 *mode = tmp & ~S_IFMT;
3053 bitmap[1] &= ~FATTR4_WORD1_MODE; 3118 bitmap[1] &= ~FATTR4_WORD1_MODE;
3054 ret = NFS_ATTR_FATTR_MODE; 3119 ret = NFS_ATTR_FATTR_MODE;
3055 } 3120 }
3056 dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); 3121 dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
3057 return ret; 3122 return ret;
3123out_overflow:
3124 print_overflow_msg(__func__, xdr);
3125 return -EIO;
3058} 3126}
3059 3127
3060static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink) 3128static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
@@ -3066,16 +3134,22 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
3066 if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U))) 3134 if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
3067 return -EIO; 3135 return -EIO;
3068 if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) { 3136 if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) {
3069 READ_BUF(4); 3137 p = xdr_inline_decode(xdr, 4);
3070 READ32(*nlink); 3138 if (unlikely(!p))
3139 goto out_overflow;
3140 *nlink = be32_to_cpup(p);
3071 bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; 3141 bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
3072 ret = NFS_ATTR_FATTR_NLINK; 3142 ret = NFS_ATTR_FATTR_NLINK;
3073 } 3143 }
3074 dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink); 3144 dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
3075 return ret; 3145 return ret;
3146out_overflow:
3147 print_overflow_msg(__func__, xdr);
3148 return -EIO;
3076} 3149}
3077 3150
3078static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid) 3151static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3152 struct nfs_client *clp, uint32_t *uid, int may_sleep)
3079{ 3153{
3080 uint32_t len; 3154 uint32_t len;
3081 __be32 *p; 3155 __be32 *p;
@@ -3085,10 +3159,16 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3085 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) 3159 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
3086 return -EIO; 3160 return -EIO;
3087 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { 3161 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
3088 READ_BUF(4); 3162 p = xdr_inline_decode(xdr, 4);
3089 READ32(len); 3163 if (unlikely(!p))
3090 READ_BUF(len); 3164 goto out_overflow;
3091 if (len < XDR_MAX_NETOBJ) { 3165 len = be32_to_cpup(p);
3166 p = xdr_inline_decode(xdr, len);
3167 if (unlikely(!p))
3168 goto out_overflow;
3169 if (!may_sleep) {
3170 /* do nothing */
3171 } else if (len < XDR_MAX_NETOBJ) {
3092 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) 3172 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0)
3093 ret = NFS_ATTR_FATTR_OWNER; 3173 ret = NFS_ATTR_FATTR_OWNER;
3094 else 3174 else
@@ -3101,9 +3181,13 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3101 } 3181 }
3102 dprintk("%s: uid=%d\n", __func__, (int)*uid); 3182 dprintk("%s: uid=%d\n", __func__, (int)*uid);
3103 return ret; 3183 return ret;
3184out_overflow:
3185 print_overflow_msg(__func__, xdr);
3186 return -EIO;
3104} 3187}
3105 3188
3106static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid) 3189static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3190 struct nfs_client *clp, uint32_t *gid, int may_sleep)
3107{ 3191{
3108 uint32_t len; 3192 uint32_t len;
3109 __be32 *p; 3193 __be32 *p;
@@ -3113,10 +3197,16 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3113 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) 3197 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
3114 return -EIO; 3198 return -EIO;
3115 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { 3199 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
3116 READ_BUF(4); 3200 p = xdr_inline_decode(xdr, 4);
3117 READ32(len); 3201 if (unlikely(!p))
3118 READ_BUF(len); 3202 goto out_overflow;
3119 if (len < XDR_MAX_NETOBJ) { 3203 len = be32_to_cpup(p);
3204 p = xdr_inline_decode(xdr, len);
3205 if (unlikely(!p))
3206 goto out_overflow;
3207 if (!may_sleep) {
3208 /* do nothing */
3209 } else if (len < XDR_MAX_NETOBJ) {
3120 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) 3210 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0)
3121 ret = NFS_ATTR_FATTR_GROUP; 3211 ret = NFS_ATTR_FATTR_GROUP;
3122 else 3212 else
@@ -3129,6 +3219,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3129 } 3219 }
3130 dprintk("%s: gid=%d\n", __func__, (int)*gid); 3220 dprintk("%s: gid=%d\n", __func__, (int)*gid);
3131 return ret; 3221 return ret;
3222out_overflow:
3223 print_overflow_msg(__func__, xdr);
3224 return -EIO;
3132} 3225}
3133 3226
3134static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev) 3227static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
@@ -3143,9 +3236,11 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
3143 if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) { 3236 if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) {
3144 dev_t tmp; 3237 dev_t tmp;
3145 3238
3146 READ_BUF(8); 3239 p = xdr_inline_decode(xdr, 8);
3147 READ32(major); 3240 if (unlikely(!p))
3148 READ32(minor); 3241 goto out_overflow;
3242 major = be32_to_cpup(p++);
3243 minor = be32_to_cpup(p);
3149 tmp = MKDEV(major, minor); 3244 tmp = MKDEV(major, minor);
3150 if (MAJOR(tmp) == major && MINOR(tmp) == minor) 3245 if (MAJOR(tmp) == major && MINOR(tmp) == minor)
3151 *rdev = tmp; 3246 *rdev = tmp;
@@ -3154,6 +3249,9 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
3154 } 3249 }
3155 dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor); 3250 dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
3156 return ret; 3251 return ret;
3252out_overflow:
3253 print_overflow_msg(__func__, xdr);
3254 return -EIO;
3157} 3255}
3158 3256
3159static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3257static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3165,12 +3263,17 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
3165 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U))) 3263 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U)))
3166 return -EIO; 3264 return -EIO;
3167 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) { 3265 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) {
3168 READ_BUF(8); 3266 p = xdr_inline_decode(xdr, 8);
3169 READ64(*res); 3267 if (unlikely(!p))
3268 goto out_overflow;
3269 xdr_decode_hyper(p, res);
3170 bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL; 3270 bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL;
3171 } 3271 }
3172 dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res); 3272 dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res);
3173 return status; 3273 return status;
3274out_overflow:
3275 print_overflow_msg(__func__, xdr);
3276 return -EIO;
3174} 3277}
3175 3278
3176static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3279static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3182,12 +3285,17 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
3182 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U))) 3285 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U)))
3183 return -EIO; 3286 return -EIO;
3184 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) { 3287 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) {
3185 READ_BUF(8); 3288 p = xdr_inline_decode(xdr, 8);
3186 READ64(*res); 3289 if (unlikely(!p))
3290 goto out_overflow;
3291 xdr_decode_hyper(p, res);
3187 bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE; 3292 bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE;
3188 } 3293 }
3189 dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res); 3294 dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res);
3190 return status; 3295 return status;
3296out_overflow:
3297 print_overflow_msg(__func__, xdr);
3298 return -EIO;
3191} 3299}
3192 3300
3193static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3301static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3199,12 +3307,17 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
3199 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U))) 3307 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U)))
3200 return -EIO; 3308 return -EIO;
3201 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) { 3309 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) {
3202 READ_BUF(8); 3310 p = xdr_inline_decode(xdr, 8);
3203 READ64(*res); 3311 if (unlikely(!p))
3312 goto out_overflow;
3313 xdr_decode_hyper(p, res);
3204 bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL; 3314 bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL;
3205 } 3315 }
3206 dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res); 3316 dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res);
3207 return status; 3317 return status;
3318out_overflow:
3319 print_overflow_msg(__func__, xdr);
3320 return -EIO;
3208} 3321}
3209 3322
3210static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used) 3323static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
@@ -3216,14 +3329,19 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
3216 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U))) 3329 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
3217 return -EIO; 3330 return -EIO;
3218 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) { 3331 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) {
3219 READ_BUF(8); 3332 p = xdr_inline_decode(xdr, 8);
3220 READ64(*used); 3333 if (unlikely(!p))
3334 goto out_overflow;
3335 xdr_decode_hyper(p, used);
3221 bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; 3336 bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
3222 ret = NFS_ATTR_FATTR_SPACE_USED; 3337 ret = NFS_ATTR_FATTR_SPACE_USED;
3223 } 3338 }
3224 dprintk("%s: space used=%Lu\n", __func__, 3339 dprintk("%s: space used=%Lu\n", __func__,
3225 (unsigned long long)*used); 3340 (unsigned long long)*used);
3226 return ret; 3341 return ret;
3342out_overflow:
3343 print_overflow_msg(__func__, xdr);
3344 return -EIO;
3227} 3345}
3228 3346
3229static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) 3347static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
@@ -3232,12 +3350,17 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
3232 uint64_t sec; 3350 uint64_t sec;
3233 uint32_t nsec; 3351 uint32_t nsec;
3234 3352
3235 READ_BUF(12); 3353 p = xdr_inline_decode(xdr, 12);
3236 READ64(sec); 3354 if (unlikely(!p))
3237 READ32(nsec); 3355 goto out_overflow;
3356 p = xdr_decode_hyper(p, &sec);
3357 nsec = be32_to_cpup(p);
3238 time->tv_sec = (time_t)sec; 3358 time->tv_sec = (time_t)sec;
3239 time->tv_nsec = (long)nsec; 3359 time->tv_nsec = (long)nsec;
3240 return 0; 3360 return 0;
3361out_overflow:
3362 print_overflow_msg(__func__, xdr);
3363 return -EIO;
3241} 3364}
3242 3365
3243static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) 3366static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
@@ -3315,11 +3438,16 @@ static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *c
3315{ 3438{
3316 __be32 *p; 3439 __be32 *p;
3317 3440
3318 READ_BUF(20); 3441 p = xdr_inline_decode(xdr, 20);
3319 READ32(cinfo->atomic); 3442 if (unlikely(!p))
3320 READ64(cinfo->before); 3443 goto out_overflow;
3321 READ64(cinfo->after); 3444 cinfo->atomic = be32_to_cpup(p++);
3445 p = xdr_decode_hyper(p, &cinfo->before);
3446 xdr_decode_hyper(p, &cinfo->after);
3322 return 0; 3447 return 0;
3448out_overflow:
3449 print_overflow_msg(__func__, xdr);
3450 return -EIO;
3323} 3451}
3324 3452
3325static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) 3453static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
@@ -3331,40 +3459,62 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
3331 status = decode_op_hdr(xdr, OP_ACCESS); 3459 status = decode_op_hdr(xdr, OP_ACCESS);
3332 if (status) 3460 if (status)
3333 return status; 3461 return status;
3334 READ_BUF(8); 3462 p = xdr_inline_decode(xdr, 8);
3335 READ32(supp); 3463 if (unlikely(!p))
3336 READ32(acc); 3464 goto out_overflow;
3465 supp = be32_to_cpup(p++);
3466 acc = be32_to_cpup(p);
3337 access->supported = supp; 3467 access->supported = supp;
3338 access->access = acc; 3468 access->access = acc;
3339 return 0; 3469 return 0;
3470out_overflow:
3471 print_overflow_msg(__func__, xdr);
3472 return -EIO;
3340} 3473}
3341 3474
3342static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) 3475static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
3343{ 3476{
3344 __be32 *p; 3477 __be32 *p;
3478
3479 p = xdr_inline_decode(xdr, len);
3480 if (likely(p)) {
3481 memcpy(buf, p, len);
3482 return 0;
3483 }
3484 print_overflow_msg(__func__, xdr);
3485 return -EIO;
3486}
3487
3488static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
3489{
3490 return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
3491}
3492
3493static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
3494{
3345 int status; 3495 int status;
3346 3496
3347 status = decode_op_hdr(xdr, OP_CLOSE); 3497 status = decode_op_hdr(xdr, OP_CLOSE);
3348 if (status != -EIO) 3498 if (status != -EIO)
3349 nfs_increment_open_seqid(status, res->seqid); 3499 nfs_increment_open_seqid(status, res->seqid);
3350 if (status) 3500 if (!status)
3351 return status; 3501 status = decode_stateid(xdr, &res->stateid);
3352 READ_BUF(NFS4_STATEID_SIZE); 3502 return status;
3353 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3503}
3354 return 0; 3504
3505static int decode_verifier(struct xdr_stream *xdr, void *verifier)
3506{
3507 return decode_opaque_fixed(xdr, verifier, 8);
3355} 3508}
3356 3509
3357static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) 3510static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
3358{ 3511{
3359 __be32 *p;
3360 int status; 3512 int status;
3361 3513
3362 status = decode_op_hdr(xdr, OP_COMMIT); 3514 status = decode_op_hdr(xdr, OP_COMMIT);
3363 if (status) 3515 if (!status)
3364 return status; 3516 status = decode_verifier(xdr, res->verf->verifier);
3365 READ_BUF(8); 3517 return status;
3366 COPYMEM(res->verf->verifier, 8);
3367 return 0;
3368} 3518}
3369 3519
3370static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 3520static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3378,10 +3528,16 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
3378 return status; 3528 return status;
3379 if ((status = decode_change_info(xdr, cinfo))) 3529 if ((status = decode_change_info(xdr, cinfo)))
3380 return status; 3530 return status;
3381 READ_BUF(4); 3531 p = xdr_inline_decode(xdr, 4);
3382 READ32(bmlen); 3532 if (unlikely(!p))
3383 READ_BUF(bmlen << 2); 3533 goto out_overflow;
3384 return 0; 3534 bmlen = be32_to_cpup(p);
3535 p = xdr_inline_decode(xdr, bmlen << 2);
3536 if (likely(p))
3537 return 0;
3538out_overflow:
3539 print_overflow_msg(__func__, xdr);
3540 return -EIO;
3385} 3541}
3386 3542
3387static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) 3543static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
@@ -3466,7 +3622,8 @@ xdr_error:
3466 return status; 3622 return status;
3467} 3623}
3468 3624
3469static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) 3625static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3626 const struct nfs_server *server, int may_sleep)
3470{ 3627{
3471 __be32 *savep; 3628 __be32 *savep;
3472 uint32_t attrlen, 3629 uint32_t attrlen,
@@ -3538,12 +3695,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
3538 goto xdr_error; 3695 goto xdr_error;
3539 fattr->valid |= status; 3696 fattr->valid |= status;
3540 3697
3541 status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid); 3698 status = decode_attr_owner(xdr, bitmap, server->nfs_client,
3699 &fattr->uid, may_sleep);
3542 if (status < 0) 3700 if (status < 0)
3543 goto xdr_error; 3701 goto xdr_error;
3544 fattr->valid |= status; 3702 fattr->valid |= status;
3545 3703
3546 status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid); 3704 status = decode_attr_group(xdr, bitmap, server->nfs_client,
3705 &fattr->gid, may_sleep);
3547 if (status < 0) 3706 if (status < 0)
3548 goto xdr_error; 3707 goto xdr_error;
3549 fattr->valid |= status; 3708 fattr->valid |= status;
@@ -3633,14 +3792,21 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
3633 if (status) 3792 if (status)
3634 return status; 3793 return status;
3635 3794
3636 READ_BUF(4); 3795 p = xdr_inline_decode(xdr, 4);
3637 READ32(len); 3796 if (unlikely(!p))
3797 goto out_overflow;
3798 len = be32_to_cpup(p);
3638 if (len > NFS4_FHSIZE) 3799 if (len > NFS4_FHSIZE)
3639 return -EIO; 3800 return -EIO;
3640 fh->size = len; 3801 fh->size = len;
3641 READ_BUF(len); 3802 p = xdr_inline_decode(xdr, len);
3642 COPYMEM(fh->data, len); 3803 if (unlikely(!p))
3804 goto out_overflow;
3805 memcpy(fh->data, p, len);
3643 return 0; 3806 return 0;
3807out_overflow:
3808 print_overflow_msg(__func__, xdr);
3809 return -EIO;
3644} 3810}
3645 3811
3646static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 3812static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3662,10 +3828,12 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3662 __be32 *p; 3828 __be32 *p;
3663 uint32_t namelen, type; 3829 uint32_t namelen, type;
3664 3830
3665 READ_BUF(32); 3831 p = xdr_inline_decode(xdr, 32);
3666 READ64(offset); 3832 if (unlikely(!p))
3667 READ64(length); 3833 goto out_overflow;
3668 READ32(type); 3834 p = xdr_decode_hyper(p, &offset);
3835 p = xdr_decode_hyper(p, &length);
3836 type = be32_to_cpup(p++);
3669 if (fl != NULL) { 3837 if (fl != NULL) {
3670 fl->fl_start = (loff_t)offset; 3838 fl->fl_start = (loff_t)offset;
3671 fl->fl_end = fl->fl_start + (loff_t)length - 1; 3839 fl->fl_end = fl->fl_start + (loff_t)length - 1;
@@ -3676,23 +3844,27 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3676 fl->fl_type = F_RDLCK; 3844 fl->fl_type = F_RDLCK;
3677 fl->fl_pid = 0; 3845 fl->fl_pid = 0;
3678 } 3846 }
3679 READ64(clientid); 3847 p = xdr_decode_hyper(p, &clientid);
3680 READ32(namelen); 3848 namelen = be32_to_cpup(p);
3681 READ_BUF(namelen); 3849 p = xdr_inline_decode(xdr, namelen);
3682 return -NFS4ERR_DENIED; 3850 if (likely(p))
3851 return -NFS4ERR_DENIED;
3852out_overflow:
3853 print_overflow_msg(__func__, xdr);
3854 return -EIO;
3683} 3855}
3684 3856
3685static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) 3857static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
3686{ 3858{
3687 __be32 *p;
3688 int status; 3859 int status;
3689 3860
3690 status = decode_op_hdr(xdr, OP_LOCK); 3861 status = decode_op_hdr(xdr, OP_LOCK);
3691 if (status == -EIO) 3862 if (status == -EIO)
3692 goto out; 3863 goto out;
3693 if (status == 0) { 3864 if (status == 0) {
3694 READ_BUF(NFS4_STATEID_SIZE); 3865 status = decode_stateid(xdr, &res->stateid);
3695 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3866 if (unlikely(status))
3867 goto out;
3696 } else if (status == -NFS4ERR_DENIED) 3868 } else if (status == -NFS4ERR_DENIED)
3697 status = decode_lock_denied(xdr, NULL); 3869 status = decode_lock_denied(xdr, NULL);
3698 if (res->open_seqid != NULL) 3870 if (res->open_seqid != NULL)
@@ -3713,16 +3885,13 @@ static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res)
3713 3885
3714static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) 3886static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
3715{ 3887{
3716 __be32 *p;
3717 int status; 3888 int status;
3718 3889
3719 status = decode_op_hdr(xdr, OP_LOCKU); 3890 status = decode_op_hdr(xdr, OP_LOCKU);
3720 if (status != -EIO) 3891 if (status != -EIO)
3721 nfs_increment_lock_seqid(status, res->seqid); 3892 nfs_increment_lock_seqid(status, res->seqid);
3722 if (status == 0) { 3893 if (status == 0)
3723 READ_BUF(NFS4_STATEID_SIZE); 3894 status = decode_stateid(xdr, &res->stateid);
3724 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3725 }
3726 return status; 3895 return status;
3727} 3896}
3728 3897
@@ -3737,34 +3906,46 @@ static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
3737 __be32 *p; 3906 __be32 *p;
3738 uint32_t limit_type, nblocks, blocksize; 3907 uint32_t limit_type, nblocks, blocksize;
3739 3908
3740 READ_BUF(12); 3909 p = xdr_inline_decode(xdr, 12);
3741 READ32(limit_type); 3910 if (unlikely(!p))
3911 goto out_overflow;
3912 limit_type = be32_to_cpup(p++);
3742 switch (limit_type) { 3913 switch (limit_type) {
3743 case 1: 3914 case 1:
3744 READ64(*maxsize); 3915 xdr_decode_hyper(p, maxsize);
3745 break; 3916 break;
3746 case 2: 3917 case 2:
3747 READ32(nblocks); 3918 nblocks = be32_to_cpup(p++);
3748 READ32(blocksize); 3919 blocksize = be32_to_cpup(p);
3749 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; 3920 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
3750 } 3921 }
3751 return 0; 3922 return 0;
3923out_overflow:
3924 print_overflow_msg(__func__, xdr);
3925 return -EIO;
3752} 3926}
3753 3927
3754static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) 3928static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3755{ 3929{
3756 __be32 *p; 3930 __be32 *p;
3757 uint32_t delegation_type; 3931 uint32_t delegation_type;
3932 int status;
3758 3933
3759 READ_BUF(4); 3934 p = xdr_inline_decode(xdr, 4);
3760 READ32(delegation_type); 3935 if (unlikely(!p))
3936 goto out_overflow;
3937 delegation_type = be32_to_cpup(p);
3761 if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { 3938 if (delegation_type == NFS4_OPEN_DELEGATE_NONE) {
3762 res->delegation_type = 0; 3939 res->delegation_type = 0;
3763 return 0; 3940 return 0;
3764 } 3941 }
3765 READ_BUF(NFS4_STATEID_SIZE+4); 3942 status = decode_stateid(xdr, &res->delegation);
3766 COPYMEM(res->delegation.data, NFS4_STATEID_SIZE); 3943 if (unlikely(status))
3767 READ32(res->do_recall); 3944 return status;
3945 p = xdr_inline_decode(xdr, 4);
3946 if (unlikely(!p))
3947 goto out_overflow;
3948 res->do_recall = be32_to_cpup(p);
3768 3949
3769 switch (delegation_type) { 3950 switch (delegation_type) {
3770 case NFS4_OPEN_DELEGATE_READ: 3951 case NFS4_OPEN_DELEGATE_READ:
@@ -3776,6 +3957,9 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3776 return -EIO; 3957 return -EIO;
3777 } 3958 }
3778 return decode_ace(xdr, NULL, res->server->nfs_client); 3959 return decode_ace(xdr, NULL, res->server->nfs_client);
3960out_overflow:
3961 print_overflow_msg(__func__, xdr);
3962 return -EIO;
3779} 3963}
3780 3964
3781static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) 3965static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3787,23 +3971,27 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3787 status = decode_op_hdr(xdr, OP_OPEN); 3971 status = decode_op_hdr(xdr, OP_OPEN);
3788 if (status != -EIO) 3972 if (status != -EIO)
3789 nfs_increment_open_seqid(status, res->seqid); 3973 nfs_increment_open_seqid(status, res->seqid);
3790 if (status) 3974 if (!status)
3975 status = decode_stateid(xdr, &res->stateid);
3976 if (unlikely(status))
3791 return status; 3977 return status;
3792 READ_BUF(NFS4_STATEID_SIZE);
3793 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3794 3978
3795 decode_change_info(xdr, &res->cinfo); 3979 decode_change_info(xdr, &res->cinfo);
3796 3980
3797 READ_BUF(8); 3981 p = xdr_inline_decode(xdr, 8);
3798 READ32(res->rflags); 3982 if (unlikely(!p))
3799 READ32(bmlen); 3983 goto out_overflow;
3984 res->rflags = be32_to_cpup(p++);
3985 bmlen = be32_to_cpup(p);
3800 if (bmlen > 10) 3986 if (bmlen > 10)
3801 goto xdr_error; 3987 goto xdr_error;
3802 3988
3803 READ_BUF(bmlen << 2); 3989 p = xdr_inline_decode(xdr, bmlen << 2);
3990 if (unlikely(!p))
3991 goto out_overflow;
3804 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); 3992 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
3805 for (i = 0; i < savewords; ++i) 3993 for (i = 0; i < savewords; ++i)
3806 READ32(res->attrset[i]); 3994 res->attrset[i] = be32_to_cpup(p++);
3807 for (; i < NFS4_BITMAP_SIZE; i++) 3995 for (; i < NFS4_BITMAP_SIZE; i++)
3808 res->attrset[i] = 0; 3996 res->attrset[i] = 0;
3809 3997
@@ -3811,36 +3999,33 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3811xdr_error: 3999xdr_error:
3812 dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); 4000 dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
3813 return -EIO; 4001 return -EIO;
4002out_overflow:
4003 print_overflow_msg(__func__, xdr);
4004 return -EIO;
3814} 4005}
3815 4006
3816static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) 4007static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
3817{ 4008{
3818 __be32 *p;
3819 int status; 4009 int status;
3820 4010
3821 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); 4011 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
3822 if (status != -EIO) 4012 if (status != -EIO)
3823 nfs_increment_open_seqid(status, res->seqid); 4013 nfs_increment_open_seqid(status, res->seqid);
3824 if (status) 4014 if (!status)
3825 return status; 4015 status = decode_stateid(xdr, &res->stateid);
3826 READ_BUF(NFS4_STATEID_SIZE); 4016 return status;
3827 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3828 return 0;
3829} 4017}
3830 4018
3831static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) 4019static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
3832{ 4020{
3833 __be32 *p;
3834 int status; 4021 int status;
3835 4022
3836 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); 4023 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
3837 if (status != -EIO) 4024 if (status != -EIO)
3838 nfs_increment_open_seqid(status, res->seqid); 4025 nfs_increment_open_seqid(status, res->seqid);
3839 if (status) 4026 if (!status)
3840 return status; 4027 status = decode_stateid(xdr, &res->stateid);
3841 READ_BUF(NFS4_STATEID_SIZE); 4028 return status;
3842 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3843 return 0;
3844} 4029}
3845 4030
3846static int decode_putfh(struct xdr_stream *xdr) 4031static int decode_putfh(struct xdr_stream *xdr)
@@ -3863,9 +4048,11 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
3863 status = decode_op_hdr(xdr, OP_READ); 4048 status = decode_op_hdr(xdr, OP_READ);
3864 if (status) 4049 if (status)
3865 return status; 4050 return status;
3866 READ_BUF(8); 4051 p = xdr_inline_decode(xdr, 8);
3867 READ32(eof); 4052 if (unlikely(!p))
3868 READ32(count); 4053 goto out_overflow;
4054 eof = be32_to_cpup(p++);
4055 count = be32_to_cpup(p);
3869 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 4056 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
3870 recvd = req->rq_rcv_buf.len - hdrlen; 4057 recvd = req->rq_rcv_buf.len - hdrlen;
3871 if (count > recvd) { 4058 if (count > recvd) {
@@ -3878,6 +4065,9 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
3878 res->eof = eof; 4065 res->eof = eof;
3879 res->count = count; 4066 res->count = count;
3880 return 0; 4067 return 0;
4068out_overflow:
4069 print_overflow_msg(__func__, xdr);
4070 return -EIO;
3881} 4071}
3882 4072
3883static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) 4073static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
@@ -3892,17 +4082,17 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3892 int status; 4082 int status;
3893 4083
3894 status = decode_op_hdr(xdr, OP_READDIR); 4084 status = decode_op_hdr(xdr, OP_READDIR);
3895 if (status) 4085 if (!status)
4086 status = decode_verifier(xdr, readdir->verifier.data);
4087 if (unlikely(status))
3896 return status; 4088 return status;
3897 READ_BUF(8);
3898 COPYMEM(readdir->verifier.data, 8);
3899 dprintk("%s: verifier = %08x:%08x\n", 4089 dprintk("%s: verifier = %08x:%08x\n",
3900 __func__, 4090 __func__,
3901 ((u32 *)readdir->verifier.data)[0], 4091 ((u32 *)readdir->verifier.data)[0],
3902 ((u32 *)readdir->verifier.data)[1]); 4092 ((u32 *)readdir->verifier.data)[1]);
3903 4093
3904 4094
3905 hdrlen = (char *) p - (char *) iov->iov_base; 4095 hdrlen = (char *) xdr->p - (char *) iov->iov_base;
3906 recvd = rcvbuf->len - hdrlen; 4096 recvd = rcvbuf->len - hdrlen;
3907 if (pglen > recvd) 4097 if (pglen > recvd)
3908 pglen = recvd; 4098 pglen = recvd;
@@ -3990,8 +4180,10 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
3990 return status; 4180 return status;
3991 4181
3992 /* Convert length of symlink */ 4182 /* Convert length of symlink */
3993 READ_BUF(4); 4183 p = xdr_inline_decode(xdr, 4);
3994 READ32(len); 4184 if (unlikely(!p))
4185 goto out_overflow;
4186 len = be32_to_cpup(p);
3995 if (len >= rcvbuf->page_len || len <= 0) { 4187 if (len >= rcvbuf->page_len || len <= 0) {
3996 dprintk("nfs: server returned giant symlink!\n"); 4188 dprintk("nfs: server returned giant symlink!\n");
3997 return -ENAMETOOLONG; 4189 return -ENAMETOOLONG;
@@ -4015,6 +4207,9 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4015 kaddr[len+rcvbuf->page_base] = '\0'; 4207 kaddr[len+rcvbuf->page_base] = '\0';
4016 kunmap_atomic(kaddr, KM_USER0); 4208 kunmap_atomic(kaddr, KM_USER0);
4017 return 0; 4209 return 0;
4210out_overflow:
4211 print_overflow_msg(__func__, xdr);
4212 return -EIO;
4018} 4213}
4019 4214
4020static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 4215static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -4112,10 +4307,16 @@ static int decode_setattr(struct xdr_stream *xdr)
4112 status = decode_op_hdr(xdr, OP_SETATTR); 4307 status = decode_op_hdr(xdr, OP_SETATTR);
4113 if (status) 4308 if (status)
4114 return status; 4309 return status;
4115 READ_BUF(4); 4310 p = xdr_inline_decode(xdr, 4);
4116 READ32(bmlen); 4311 if (unlikely(!p))
4117 READ_BUF(bmlen << 2); 4312 goto out_overflow;
4118 return 0; 4313 bmlen = be32_to_cpup(p);
4314 p = xdr_inline_decode(xdr, bmlen << 2);
4315 if (likely(p))
4316 return 0;
4317out_overflow:
4318 print_overflow_msg(__func__, xdr);
4319 return -EIO;
4119} 4320}
4120 4321
4121static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) 4322static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
@@ -4124,35 +4325,50 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
4124 uint32_t opnum; 4325 uint32_t opnum;
4125 int32_t nfserr; 4326 int32_t nfserr;
4126 4327
4127 READ_BUF(8); 4328 p = xdr_inline_decode(xdr, 8);
4128 READ32(opnum); 4329 if (unlikely(!p))
4330 goto out_overflow;
4331 opnum = be32_to_cpup(p++);
4129 if (opnum != OP_SETCLIENTID) { 4332 if (opnum != OP_SETCLIENTID) {
4130 dprintk("nfs: decode_setclientid: Server returned operation" 4333 dprintk("nfs: decode_setclientid: Server returned operation"
4131 " %d\n", opnum); 4334 " %d\n", opnum);
4132 return -EIO; 4335 return -EIO;
4133 } 4336 }
4134 READ32(nfserr); 4337 nfserr = be32_to_cpup(p);
4135 if (nfserr == NFS_OK) { 4338 if (nfserr == NFS_OK) {
4136 READ_BUF(8 + NFS4_VERIFIER_SIZE); 4339 p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
4137 READ64(clp->cl_clientid); 4340 if (unlikely(!p))
4138 COPYMEM(clp->cl_confirm.data, NFS4_VERIFIER_SIZE); 4341 goto out_overflow;
4342 p = xdr_decode_hyper(p, &clp->cl_clientid);
4343 memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE);
4139 } else if (nfserr == NFSERR_CLID_INUSE) { 4344 } else if (nfserr == NFSERR_CLID_INUSE) {
4140 uint32_t len; 4345 uint32_t len;
4141 4346
4142 /* skip netid string */ 4347 /* skip netid string */
4143 READ_BUF(4); 4348 p = xdr_inline_decode(xdr, 4);
4144 READ32(len); 4349 if (unlikely(!p))
4145 READ_BUF(len); 4350 goto out_overflow;
4351 len = be32_to_cpup(p);
4352 p = xdr_inline_decode(xdr, len);
4353 if (unlikely(!p))
4354 goto out_overflow;
4146 4355
4147 /* skip uaddr string */ 4356 /* skip uaddr string */
4148 READ_BUF(4); 4357 p = xdr_inline_decode(xdr, 4);
4149 READ32(len); 4358 if (unlikely(!p))
4150 READ_BUF(len); 4359 goto out_overflow;
4360 len = be32_to_cpup(p);
4361 p = xdr_inline_decode(xdr, len);
4362 if (unlikely(!p))
4363 goto out_overflow;
4151 return -NFSERR_CLID_INUSE; 4364 return -NFSERR_CLID_INUSE;
4152 } else 4365 } else
4153 return nfs4_stat_to_errno(nfserr); 4366 return nfs4_stat_to_errno(nfserr);
4154 4367
4155 return 0; 4368 return 0;
4369out_overflow:
4370 print_overflow_msg(__func__, xdr);
4371 return -EIO;
4156} 4372}
4157 4373
4158static int decode_setclientid_confirm(struct xdr_stream *xdr) 4374static int decode_setclientid_confirm(struct xdr_stream *xdr)
@@ -4169,11 +4385,16 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
4169 if (status) 4385 if (status)
4170 return status; 4386 return status;
4171 4387
4172 READ_BUF(16); 4388 p = xdr_inline_decode(xdr, 16);
4173 READ32(res->count); 4389 if (unlikely(!p))
4174 READ32(res->verf->committed); 4390 goto out_overflow;
4175 COPYMEM(res->verf->verifier, 8); 4391 res->count = be32_to_cpup(p++);
4392 res->verf->committed = be32_to_cpup(p++);
4393 memcpy(res->verf->verifier, p, 8);
4176 return 0; 4394 return 0;
4395out_overflow:
4396 print_overflow_msg(__func__, xdr);
4397 return -EIO;
4177} 4398}
4178 4399
4179static int decode_delegreturn(struct xdr_stream *xdr) 4400static int decode_delegreturn(struct xdr_stream *xdr)
@@ -4187,6 +4408,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4187{ 4408{
4188 __be32 *p; 4409 __be32 *p;
4189 uint32_t dummy; 4410 uint32_t dummy;
4411 char *dummy_str;
4190 int status; 4412 int status;
4191 struct nfs_client *clp = res->client; 4413 struct nfs_client *clp = res->client;
4192 4414
@@ -4194,36 +4416,45 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4194 if (status) 4416 if (status)
4195 return status; 4417 return status;
4196 4418
4197 READ_BUF(8); 4419 p = xdr_inline_decode(xdr, 8);
4198 READ64(clp->cl_ex_clid); 4420 if (unlikely(!p))
4199 READ_BUF(12); 4421 goto out_overflow;
4200 READ32(clp->cl_seqid); 4422 xdr_decode_hyper(p, &clp->cl_ex_clid);
4201 READ32(clp->cl_exchange_flags); 4423 p = xdr_inline_decode(xdr, 12);
4424 if (unlikely(!p))
4425 goto out_overflow;
4426 clp->cl_seqid = be32_to_cpup(p++);
4427 clp->cl_exchange_flags = be32_to_cpup(p++);
4202 4428
4203 /* We ask for SP4_NONE */ 4429 /* We ask for SP4_NONE */
4204 READ32(dummy); 4430 dummy = be32_to_cpup(p);
4205 if (dummy != SP4_NONE) 4431 if (dummy != SP4_NONE)
4206 return -EIO; 4432 return -EIO;
4207 4433
4208 /* Throw away minor_id */ 4434 /* Throw away minor_id */
4209 READ_BUF(8); 4435 p = xdr_inline_decode(xdr, 8);
4436 if (unlikely(!p))
4437 goto out_overflow;
4210 4438
4211 /* Throw away Major id */ 4439 /* Throw away Major id */
4212 READ_BUF(4); 4440 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4213 READ32(dummy); 4441 if (unlikely(status))
4214 READ_BUF(dummy); 4442 return status;
4215 4443
4216 /* Throw away server_scope */ 4444 /* Throw away server_scope */
4217 READ_BUF(4); 4445 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4218 READ32(dummy); 4446 if (unlikely(status))
4219 READ_BUF(dummy); 4447 return status;
4220 4448
4221 /* Throw away Implementation id array */ 4449 /* Throw away Implementation id array */
4222 READ_BUF(4); 4450 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4223 READ32(dummy); 4451 if (unlikely(status))
4224 READ_BUF(dummy); 4452 return status;
4225 4453
4226 return 0; 4454 return 0;
4455out_overflow:
4456 print_overflow_msg(__func__, xdr);
4457 return -EIO;
4227} 4458}
4228 4459
4229static int decode_chan_attrs(struct xdr_stream *xdr, 4460static int decode_chan_attrs(struct xdr_stream *xdr,
@@ -4232,22 +4463,35 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
4232 __be32 *p; 4463 __be32 *p;
4233 u32 nr_attrs; 4464 u32 nr_attrs;
4234 4465
4235 READ_BUF(28); 4466 p = xdr_inline_decode(xdr, 28);
4236 READ32(attrs->headerpadsz); 4467 if (unlikely(!p))
4237 READ32(attrs->max_rqst_sz); 4468 goto out_overflow;
4238 READ32(attrs->max_resp_sz); 4469 attrs->headerpadsz = be32_to_cpup(p++);
4239 READ32(attrs->max_resp_sz_cached); 4470 attrs->max_rqst_sz = be32_to_cpup(p++);
4240 READ32(attrs->max_ops); 4471 attrs->max_resp_sz = be32_to_cpup(p++);
4241 READ32(attrs->max_reqs); 4472 attrs->max_resp_sz_cached = be32_to_cpup(p++);
4242 READ32(nr_attrs); 4473 attrs->max_ops = be32_to_cpup(p++);
4474 attrs->max_reqs = be32_to_cpup(p++);
4475 nr_attrs = be32_to_cpup(p);
4243 if (unlikely(nr_attrs > 1)) { 4476 if (unlikely(nr_attrs > 1)) {
4244 printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", 4477 printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n",
4245 __func__, nr_attrs); 4478 __func__, nr_attrs);
4246 return -EINVAL; 4479 return -EINVAL;
4247 } 4480 }
4248 if (nr_attrs == 1) 4481 if (nr_attrs == 1) {
4249 READ_BUF(4); /* skip rdma_attrs */ 4482 p = xdr_inline_decode(xdr, 4); /* skip rdma_attrs */
4483 if (unlikely(!p))
4484 goto out_overflow;
4485 }
4250 return 0; 4486 return 0;
4487out_overflow:
4488 print_overflow_msg(__func__, xdr);
4489 return -EIO;
4490}
4491
4492static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
4493{
4494 return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
4251} 4495}
4252 4496
4253static int decode_create_session(struct xdr_stream *xdr, 4497static int decode_create_session(struct xdr_stream *xdr,
@@ -4259,24 +4503,26 @@ static int decode_create_session(struct xdr_stream *xdr,
4259 struct nfs4_session *session = clp->cl_session; 4503 struct nfs4_session *session = clp->cl_session;
4260 4504
4261 status = decode_op_hdr(xdr, OP_CREATE_SESSION); 4505 status = decode_op_hdr(xdr, OP_CREATE_SESSION);
4262 4506 if (!status)
4263 if (status) 4507 status = decode_sessionid(xdr, &session->sess_id);
4508 if (unlikely(status))
4264 return status; 4509 return status;
4265 4510
4266 /* sessionid */
4267 READ_BUF(NFS4_MAX_SESSIONID_LEN);
4268 COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN);
4269
4270 /* seqid, flags */ 4511 /* seqid, flags */
4271 READ_BUF(8); 4512 p = xdr_inline_decode(xdr, 8);
4272 READ32(clp->cl_seqid); 4513 if (unlikely(!p))
4273 READ32(session->flags); 4514 goto out_overflow;
4515 clp->cl_seqid = be32_to_cpup(p++);
4516 session->flags = be32_to_cpup(p);
4274 4517
4275 /* Channel attributes */ 4518 /* Channel attributes */
4276 status = decode_chan_attrs(xdr, &session->fc_attrs); 4519 status = decode_chan_attrs(xdr, &session->fc_attrs);
4277 if (!status) 4520 if (!status)
4278 status = decode_chan_attrs(xdr, &session->bc_attrs); 4521 status = decode_chan_attrs(xdr, &session->bc_attrs);
4279 return status; 4522 return status;
4523out_overflow:
4524 print_overflow_msg(__func__, xdr);
4525 return -EIO;
4280} 4526}
4281 4527
4282static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) 4528static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
@@ -4300,7 +4546,9 @@ static int decode_sequence(struct xdr_stream *xdr,
4300 return 0; 4546 return 0;
4301 4547
4302 status = decode_op_hdr(xdr, OP_SEQUENCE); 4548 status = decode_op_hdr(xdr, OP_SEQUENCE);
4303 if (status) 4549 if (!status)
4550 status = decode_sessionid(xdr, &id);
4551 if (unlikely(status))
4304 goto out_err; 4552 goto out_err;
4305 4553
4306 /* 4554 /*
@@ -4309,36 +4557,43 @@ static int decode_sequence(struct xdr_stream *xdr,
4309 */ 4557 */
4310 status = -ESERVERFAULT; 4558 status = -ESERVERFAULT;
4311 4559
4312 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4313 READ_BUF(NFS4_MAX_SESSIONID_LEN + 20);
4314 COPYMEM(id.data, NFS4_MAX_SESSIONID_LEN);
4315 if (memcmp(id.data, res->sr_session->sess_id.data, 4560 if (memcmp(id.data, res->sr_session->sess_id.data,
4316 NFS4_MAX_SESSIONID_LEN)) { 4561 NFS4_MAX_SESSIONID_LEN)) {
4317 dprintk("%s Invalid session id\n", __func__); 4562 dprintk("%s Invalid session id\n", __func__);
4318 goto out_err; 4563 goto out_err;
4319 } 4564 }
4565
4566 p = xdr_inline_decode(xdr, 20);
4567 if (unlikely(!p))
4568 goto out_overflow;
4569
4320 /* seqid */ 4570 /* seqid */
4321 READ32(dummy); 4571 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4572 dummy = be32_to_cpup(p++);
4322 if (dummy != slot->seq_nr) { 4573 if (dummy != slot->seq_nr) {
4323 dprintk("%s Invalid sequence number\n", __func__); 4574 dprintk("%s Invalid sequence number\n", __func__);
4324 goto out_err; 4575 goto out_err;
4325 } 4576 }
4326 /* slot id */ 4577 /* slot id */
4327 READ32(dummy); 4578 dummy = be32_to_cpup(p++);
4328 if (dummy != res->sr_slotid) { 4579 if (dummy != res->sr_slotid) {
4329 dprintk("%s Invalid slot id\n", __func__); 4580 dprintk("%s Invalid slot id\n", __func__);
4330 goto out_err; 4581 goto out_err;
4331 } 4582 }
4332 /* highest slot id - currently not processed */ 4583 /* highest slot id - currently not processed */
4333 READ32(dummy); 4584 dummy = be32_to_cpup(p++);
4334 /* target highest slot id - currently not processed */ 4585 /* target highest slot id - currently not processed */
4335 READ32(dummy); 4586 dummy = be32_to_cpup(p++);
4336 /* result flags - currently not processed */ 4587 /* result flags - currently not processed */
4337 READ32(dummy); 4588 dummy = be32_to_cpup(p);
4338 status = 0; 4589 status = 0;
4339out_err: 4590out_err:
4340 res->sr_status = status; 4591 res->sr_status = status;
4341 return status; 4592 return status;
4593out_overflow:
4594 print_overflow_msg(__func__, xdr);
4595 status = -EIO;
4596 goto out_err;
4342#else /* CONFIG_NFS_V4_1 */ 4597#else /* CONFIG_NFS_V4_1 */
4343 return 0; 4598 return 0;
4344#endif /* CONFIG_NFS_V4_1 */ 4599#endif /* CONFIG_NFS_V4_1 */
@@ -4370,7 +4625,8 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct
4370 status = decode_open_downgrade(&xdr, res); 4625 status = decode_open_downgrade(&xdr, res);
4371 if (status != 0) 4626 if (status != 0)
4372 goto out; 4627 goto out;
4373 decode_getfattr(&xdr, res->fattr, res->server); 4628 decode_getfattr(&xdr, res->fattr, res->server,
4629 !RPC_IS_ASYNC(rqstp->rq_task));
4374out: 4630out:
4375 return status; 4631 return status;
4376} 4632}
@@ -4397,7 +4653,8 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac
4397 status = decode_access(&xdr, res); 4653 status = decode_access(&xdr, res);
4398 if (status != 0) 4654 if (status != 0)
4399 goto out; 4655 goto out;
4400 decode_getfattr(&xdr, res->fattr, res->server); 4656 decode_getfattr(&xdr, res->fattr, res->server,
4657 !RPC_IS_ASYNC(rqstp->rq_task));
4401out: 4658out:
4402 return status; 4659 return status;
4403} 4660}
@@ -4424,7 +4681,8 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo
4424 goto out; 4681 goto out;
4425 if ((status = decode_getfh(&xdr, res->fh)) != 0) 4682 if ((status = decode_getfh(&xdr, res->fh)) != 0)
4426 goto out; 4683 goto out;
4427 status = decode_getfattr(&xdr, res->fattr, res->server); 4684 status = decode_getfattr(&xdr, res->fattr, res->server
4685 ,!RPC_IS_ASYNC(rqstp->rq_task));
4428out: 4686out:
4429 return status; 4687 return status;
4430} 4688}
@@ -4448,7 +4706,8 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf
4448 if ((status = decode_putrootfh(&xdr)) != 0) 4706 if ((status = decode_putrootfh(&xdr)) != 0)
4449 goto out; 4707 goto out;
4450 if ((status = decode_getfh(&xdr, res->fh)) == 0) 4708 if ((status = decode_getfh(&xdr, res->fh)) == 0)
4451 status = decode_getfattr(&xdr, res->fattr, res->server); 4709 status = decode_getfattr(&xdr, res->fattr, res->server,
4710 !RPC_IS_ASYNC(rqstp->rq_task));
4452out: 4711out:
4453 return status; 4712 return status;
4454} 4713}
@@ -4473,7 +4732,8 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem
4473 goto out; 4732 goto out;
4474 if ((status = decode_remove(&xdr, &res->cinfo)) != 0) 4733 if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
4475 goto out; 4734 goto out;
4476 decode_getfattr(&xdr, &res->dir_attr, res->server); 4735 decode_getfattr(&xdr, &res->dir_attr, res->server,
4736 !RPC_IS_ASYNC(rqstp->rq_task));
4477out: 4737out:
4478 return status; 4738 return status;
4479} 4739}
@@ -4503,11 +4763,13 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re
4503 if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) 4763 if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0)
4504 goto out; 4764 goto out;
4505 /* Current FH is target directory */ 4765 /* Current FH is target directory */
4506 if (decode_getfattr(&xdr, res->new_fattr, res->server) != 0) 4766 if (decode_getfattr(&xdr, res->new_fattr, res->server,
4767 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4507 goto out; 4768 goto out;
4508 if ((status = decode_restorefh(&xdr)) != 0) 4769 if ((status = decode_restorefh(&xdr)) != 0)
4509 goto out; 4770 goto out;
4510 decode_getfattr(&xdr, res->old_fattr, res->server); 4771 decode_getfattr(&xdr, res->old_fattr, res->server,
4772 !RPC_IS_ASYNC(rqstp->rq_task));
4511out: 4773out:
4512 return status; 4774 return status;
4513} 4775}
@@ -4540,11 +4802,13 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link
4540 * Note order: OP_LINK leaves the directory as the current 4802 * Note order: OP_LINK leaves the directory as the current
4541 * filehandle. 4803 * filehandle.
4542 */ 4804 */
4543 if (decode_getfattr(&xdr, res->dir_attr, res->server) != 0) 4805 if (decode_getfattr(&xdr, res->dir_attr, res->server,
4806 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4544 goto out; 4807 goto out;
4545 if ((status = decode_restorefh(&xdr)) != 0) 4808 if ((status = decode_restorefh(&xdr)) != 0)
4546 goto out; 4809 goto out;
4547 decode_getfattr(&xdr, res->fattr, res->server); 4810 decode_getfattr(&xdr, res->fattr, res->server,
4811 !RPC_IS_ASYNC(rqstp->rq_task));
4548out: 4812out:
4549 return status; 4813 return status;
4550} 4814}
@@ -4573,11 +4837,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr
4573 goto out; 4837 goto out;
4574 if ((status = decode_getfh(&xdr, res->fh)) != 0) 4838 if ((status = decode_getfh(&xdr, res->fh)) != 0)
4575 goto out; 4839 goto out;
4576 if (decode_getfattr(&xdr, res->fattr, res->server) != 0) 4840 if (decode_getfattr(&xdr, res->fattr, res->server,
4841 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4577 goto out; 4842 goto out;
4578 if ((status = decode_restorefh(&xdr)) != 0) 4843 if ((status = decode_restorefh(&xdr)) != 0)
4579 goto out; 4844 goto out;
4580 decode_getfattr(&xdr, res->dir_fattr, res->server); 4845 decode_getfattr(&xdr, res->dir_fattr, res->server,
4846 !RPC_IS_ASYNC(rqstp->rq_task));
4581out: 4847out:
4582 return status; 4848 return status;
4583} 4849}
@@ -4609,7 +4875,8 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g
4609 status = decode_putfh(&xdr); 4875 status = decode_putfh(&xdr);
4610 if (status) 4876 if (status)
4611 goto out; 4877 goto out;
4612 status = decode_getfattr(&xdr, res->fattr, res->server); 4878 status = decode_getfattr(&xdr, res->fattr, res->server,
4879 !RPC_IS_ASYNC(rqstp->rq_task));
4613out: 4880out:
4614 return status; 4881 return status;
4615} 4882}
@@ -4716,7 +4983,8 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
4716 * an ESTALE error. Shouldn't be a problem, 4983 * an ESTALE error. Shouldn't be a problem,
4717 * though, since fattr->valid will remain unset. 4984 * though, since fattr->valid will remain unset.
4718 */ 4985 */
4719 decode_getfattr(&xdr, res->fattr, res->server); 4986 decode_getfattr(&xdr, res->fattr, res->server,
4987 !RPC_IS_ASYNC(rqstp->rq_task));
4720out: 4988out:
4721 return status; 4989 return status;
4722} 4990}
@@ -4748,11 +5016,13 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr
4748 goto out; 5016 goto out;
4749 if (decode_getfh(&xdr, &res->fh) != 0) 5017 if (decode_getfh(&xdr, &res->fh) != 0)
4750 goto out; 5018 goto out;
4751 if (decode_getfattr(&xdr, res->f_attr, res->server) != 0) 5019 if (decode_getfattr(&xdr, res->f_attr, res->server,
5020 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4752 goto out; 5021 goto out;
4753 if (decode_restorefh(&xdr) != 0) 5022 if (decode_restorefh(&xdr) != 0)
4754 goto out; 5023 goto out;
4755 decode_getfattr(&xdr, res->dir_attr, res->server); 5024 decode_getfattr(&xdr, res->dir_attr, res->server,
5025 !RPC_IS_ASYNC(rqstp->rq_task));
4756out: 5026out:
4757 return status; 5027 return status;
4758} 5028}
@@ -4800,7 +5070,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nf
4800 status = decode_open(&xdr, res); 5070 status = decode_open(&xdr, res);
4801 if (status) 5071 if (status)
4802 goto out; 5072 goto out;
4803 decode_getfattr(&xdr, res->f_attr, res->server); 5073 decode_getfattr(&xdr, res->f_attr, res->server,
5074 !RPC_IS_ASYNC(rqstp->rq_task));
4804out: 5075out:
4805 return status; 5076 return status;
4806} 5077}
@@ -4827,7 +5098,8 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se
4827 status = decode_setattr(&xdr); 5098 status = decode_setattr(&xdr);
4828 if (status) 5099 if (status)
4829 goto out; 5100 goto out;
4830 decode_getfattr(&xdr, res->fattr, res->server); 5101 decode_getfattr(&xdr, res->fattr, res->server,
5102 !RPC_IS_ASYNC(rqstp->rq_task));
4831out: 5103out:
4832 return status; 5104 return status;
4833} 5105}
@@ -5001,7 +5273,8 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writ
5001 status = decode_write(&xdr, res); 5273 status = decode_write(&xdr, res);
5002 if (status) 5274 if (status)
5003 goto out; 5275 goto out;
5004 decode_getfattr(&xdr, res->fattr, res->server); 5276 decode_getfattr(&xdr, res->fattr, res->server,
5277 !RPC_IS_ASYNC(rqstp->rq_task));
5005 if (!status) 5278 if (!status)
5006 status = res->count; 5279 status = res->count;
5007out: 5280out:
@@ -5030,7 +5303,8 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_wri
5030 status = decode_commit(&xdr, res); 5303 status = decode_commit(&xdr, res);
5031 if (status) 5304 if (status)
5032 goto out; 5305 goto out;
5033 decode_getfattr(&xdr, res->fattr, res->server); 5306 decode_getfattr(&xdr, res->fattr, res->server,
5307 !RPC_IS_ASYNC(rqstp->rq_task));
5034out: 5308out:
5035 return status; 5309 return status;
5036} 5310}
@@ -5194,7 +5468,8 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf
5194 if (status != 0) 5468 if (status != 0)
5195 goto out; 5469 goto out;
5196 status = decode_delegreturn(&xdr); 5470 status = decode_delegreturn(&xdr);
5197 decode_getfattr(&xdr, res->fattr, res->server); 5471 decode_getfattr(&xdr, res->fattr, res->server,
5472 !RPC_IS_ASYNC(rqstp->rq_task));
5198out: 5473out:
5199 return status; 5474 return status;
5200} 5475}
@@ -5222,7 +5497,8 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
5222 goto out; 5497 goto out;
5223 xdr_enter_page(&xdr, PAGE_SIZE); 5498 xdr_enter_page(&xdr, PAGE_SIZE);
5224 status = decode_getfattr(&xdr, &res->fs_locations->fattr, 5499 status = decode_getfattr(&xdr, &res->fs_locations->fattr,
5225 res->fs_locations->server); 5500 res->fs_locations->server,
5501 !RPC_IS_ASYNC(req->rq_task));
5226out: 5502out:
5227 return status; 5503 return status;
5228} 5504}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0b4cbdc60abd..867f70504531 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -73,7 +73,7 @@ enum {
73 Opt_cto, Opt_nocto, 73 Opt_cto, Opt_nocto,
74 Opt_ac, Opt_noac, 74 Opt_ac, Opt_noac,
75 Opt_lock, Opt_nolock, 75 Opt_lock, Opt_nolock,
76 Opt_v2, Opt_v3, 76 Opt_v2, Opt_v3, Opt_v4,
77 Opt_udp, Opt_tcp, Opt_rdma, 77 Opt_udp, Opt_tcp, Opt_rdma,
78 Opt_acl, Opt_noacl, 78 Opt_acl, Opt_noacl,
79 Opt_rdirplus, Opt_nordirplus, 79 Opt_rdirplus, Opt_nordirplus,
@@ -127,6 +127,7 @@ static const match_table_t nfs_mount_option_tokens = {
127 { Opt_nolock, "nolock" }, 127 { Opt_nolock, "nolock" },
128 { Opt_v2, "v2" }, 128 { Opt_v2, "v2" },
129 { Opt_v3, "v3" }, 129 { Opt_v3, "v3" },
130 { Opt_v4, "v4" },
130 { Opt_udp, "udp" }, 131 { Opt_udp, "udp" },
131 { Opt_tcp, "tcp" }, 132 { Opt_tcp, "tcp" },
132 { Opt_rdma, "rdma" }, 133 { Opt_rdma, "rdma" },
@@ -158,7 +159,7 @@ static const match_table_t nfs_mount_option_tokens = {
158 { Opt_mountvers, "mountvers=%s" }, 159 { Opt_mountvers, "mountvers=%s" },
159 { Opt_nfsvers, "nfsvers=%s" }, 160 { Opt_nfsvers, "nfsvers=%s" },
160 { Opt_nfsvers, "vers=%s" }, 161 { Opt_nfsvers, "vers=%s" },
161 { Opt_minorversion, "minorversion=%u" }, 162 { Opt_minorversion, "minorversion=%s" },
162 163
163 { Opt_sec, "sec=%s" }, 164 { Opt_sec, "sec=%s" },
164 { Opt_proto, "proto=%s" }, 165 { Opt_proto, "proto=%s" },
@@ -272,6 +273,10 @@ static const struct super_operations nfs_sops = {
272}; 273};
273 274
274#ifdef CONFIG_NFS_V4 275#ifdef CONFIG_NFS_V4
276static int nfs4_validate_text_mount_data(void *options,
277 struct nfs_parsed_mount_data *args, const char *dev_name);
278static int nfs4_try_mount(int flags, const char *dev_name,
279 struct nfs_parsed_mount_data *data, struct vfsmount *mnt);
275static int nfs4_get_sb(struct file_system_type *fs_type, 280static int nfs4_get_sb(struct file_system_type *fs_type,
276 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 281 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
277static int nfs4_remote_get_sb(struct file_system_type *fs_type, 282static int nfs4_remote_get_sb(struct file_system_type *fs_type,
@@ -742,127 +747,23 @@ static int nfs_verify_server_address(struct sockaddr *addr)
742 } 747 }
743 } 748 }
744 749
750 dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
745 return 0; 751 return 0;
746} 752}
747 753
748static void nfs_parse_ipv4_address(char *string, size_t str_len,
749 struct sockaddr *sap, size_t *addr_len)
750{
751 struct sockaddr_in *sin = (struct sockaddr_in *)sap;
752 u8 *addr = (u8 *)&sin->sin_addr.s_addr;
753
754 if (str_len <= INET_ADDRSTRLEN) {
755 dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n",
756 (int)str_len, string);
757
758 sin->sin_family = AF_INET;
759 *addr_len = sizeof(*sin);
760 if (in4_pton(string, str_len, addr, '\0', NULL))
761 return;
762 }
763
764 sap->sa_family = AF_UNSPEC;
765 *addr_len = 0;
766}
767
768#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
769static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len,
770 const char *delim,
771 struct sockaddr_in6 *sin6)
772{
773 char *p;
774 size_t len;
775
776 if ((string + str_len) == delim)
777 return 1;
778
779 if (*delim != IPV6_SCOPE_DELIMITER)
780 return 0;
781
782 if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
783 return 0;
784
785 len = (string + str_len) - delim - 1;
786 p = kstrndup(delim + 1, len, GFP_KERNEL);
787 if (p) {
788 unsigned long scope_id = 0;
789 struct net_device *dev;
790
791 dev = dev_get_by_name(&init_net, p);
792 if (dev != NULL) {
793 scope_id = dev->ifindex;
794 dev_put(dev);
795 } else {
796 if (strict_strtoul(p, 10, &scope_id) == 0) {
797 kfree(p);
798 return 0;
799 }
800 }
801
802 kfree(p);
803
804 sin6->sin6_scope_id = scope_id;
805 dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id);
806 return 1;
807 }
808
809 return 0;
810}
811
812static void nfs_parse_ipv6_address(char *string, size_t str_len,
813 struct sockaddr *sap, size_t *addr_len)
814{
815 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
816 u8 *addr = (u8 *)&sin6->sin6_addr.in6_u;
817 const char *delim;
818
819 if (str_len <= INET6_ADDRSTRLEN) {
820 dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n",
821 (int)str_len, string);
822
823 sin6->sin6_family = AF_INET6;
824 *addr_len = sizeof(*sin6);
825 if (in6_pton(string, str_len, addr,
826 IPV6_SCOPE_DELIMITER, &delim) != 0) {
827 if (nfs_parse_ipv6_scope_id(string, str_len,
828 delim, sin6) != 0)
829 return;
830 }
831 }
832
833 sap->sa_family = AF_UNSPEC;
834 *addr_len = 0;
835}
836#else
837static void nfs_parse_ipv6_address(char *string, size_t str_len,
838 struct sockaddr *sap, size_t *addr_len)
839{
840 sap->sa_family = AF_UNSPEC;
841 *addr_len = 0;
842}
843#endif
844
845/* 754/*
846 * Construct a sockaddr based on the contents of a string that contains 755 * Select between a default port value and a user-specified port value.
847 * an IP address in presentation format. 756 * If a zero value is set, then autobind will be used.
848 *
849 * If there is a problem constructing the new sockaddr, set the address
850 * family to AF_UNSPEC.
851 */ 757 */
852void nfs_parse_ip_address(char *string, size_t str_len, 758static void nfs_set_default_port(struct sockaddr *sap, const int parsed_port,
853 struct sockaddr *sap, size_t *addr_len) 759 const unsigned short default_port)
854{ 760{
855 unsigned int i, colons; 761 unsigned short port = default_port;
856 762
857 colons = 0; 763 if (parsed_port != NFS_UNSPEC_PORT)
858 for (i = 0; i < str_len; i++) 764 port = parsed_port;
859 if (string[i] == ':')
860 colons++;
861 765
862 if (colons >= 2) 766 rpc_set_port(sap, port);
863 nfs_parse_ipv6_address(string, str_len, sap, addr_len);
864 else
865 nfs_parse_ipv4_address(string, str_len, sap, addr_len);
866} 767}
867 768
868/* 769/*
@@ -904,8 +805,6 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
904 805
905/* 806/*
906 * Parse the value of the 'sec=' option. 807 * Parse the value of the 'sec=' option.
907 *
908 * The flavor_len setting is for v4 mounts.
909 */ 808 */
910static int nfs_parse_security_flavors(char *value, 809static int nfs_parse_security_flavors(char *value,
911 struct nfs_parsed_mount_data *mnt) 810 struct nfs_parsed_mount_data *mnt)
@@ -916,53 +815,43 @@ static int nfs_parse_security_flavors(char *value,
916 815
917 switch (match_token(value, nfs_secflavor_tokens, args)) { 816 switch (match_token(value, nfs_secflavor_tokens, args)) {
918 case Opt_sec_none: 817 case Opt_sec_none:
919 mnt->auth_flavor_len = 0;
920 mnt->auth_flavors[0] = RPC_AUTH_NULL; 818 mnt->auth_flavors[0] = RPC_AUTH_NULL;
921 break; 819 break;
922 case Opt_sec_sys: 820 case Opt_sec_sys:
923 mnt->auth_flavor_len = 0;
924 mnt->auth_flavors[0] = RPC_AUTH_UNIX; 821 mnt->auth_flavors[0] = RPC_AUTH_UNIX;
925 break; 822 break;
926 case Opt_sec_krb5: 823 case Opt_sec_krb5:
927 mnt->auth_flavor_len = 1;
928 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; 824 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
929 break; 825 break;
930 case Opt_sec_krb5i: 826 case Opt_sec_krb5i:
931 mnt->auth_flavor_len = 1;
932 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; 827 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
933 break; 828 break;
934 case Opt_sec_krb5p: 829 case Opt_sec_krb5p:
935 mnt->auth_flavor_len = 1;
936 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; 830 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
937 break; 831 break;
938 case Opt_sec_lkey: 832 case Opt_sec_lkey:
939 mnt->auth_flavor_len = 1;
940 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; 833 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
941 break; 834 break;
942 case Opt_sec_lkeyi: 835 case Opt_sec_lkeyi:
943 mnt->auth_flavor_len = 1;
944 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; 836 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
945 break; 837 break;
946 case Opt_sec_lkeyp: 838 case Opt_sec_lkeyp:
947 mnt->auth_flavor_len = 1;
948 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; 839 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
949 break; 840 break;
950 case Opt_sec_spkm: 841 case Opt_sec_spkm:
951 mnt->auth_flavor_len = 1;
952 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; 842 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
953 break; 843 break;
954 case Opt_sec_spkmi: 844 case Opt_sec_spkmi:
955 mnt->auth_flavor_len = 1;
956 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; 845 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
957 break; 846 break;
958 case Opt_sec_spkmp: 847 case Opt_sec_spkmp:
959 mnt->auth_flavor_len = 1;
960 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; 848 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
961 break; 849 break;
962 default: 850 default:
963 return 0; 851 return 0;
964 } 852 }
965 853
854 mnt->auth_flavor_len = 1;
966 return 1; 855 return 1;
967} 856}
968 857
@@ -1001,7 +890,6 @@ static int nfs_parse_mount_options(char *raw,
1001 while ((p = strsep(&raw, ",")) != NULL) { 890 while ((p = strsep(&raw, ",")) != NULL) {
1002 substring_t args[MAX_OPT_ARGS]; 891 substring_t args[MAX_OPT_ARGS];
1003 unsigned long option; 892 unsigned long option;
1004 int int_option;
1005 int token; 893 int token;
1006 894
1007 if (!*p) 895 if (!*p)
@@ -1047,10 +935,18 @@ static int nfs_parse_mount_options(char *raw,
1047 break; 935 break;
1048 case Opt_v2: 936 case Opt_v2:
1049 mnt->flags &= ~NFS_MOUNT_VER3; 937 mnt->flags &= ~NFS_MOUNT_VER3;
938 mnt->version = 2;
1050 break; 939 break;
1051 case Opt_v3: 940 case Opt_v3:
1052 mnt->flags |= NFS_MOUNT_VER3; 941 mnt->flags |= NFS_MOUNT_VER3;
942 mnt->version = 3;
1053 break; 943 break;
944#ifdef CONFIG_NFS_V4
945 case Opt_v4:
946 mnt->flags &= ~NFS_MOUNT_VER3;
947 mnt->version = 4;
948 break;
949#endif
1054 case Opt_udp: 950 case Opt_udp:
1055 mnt->flags &= ~NFS_MOUNT_TCP; 951 mnt->flags &= ~NFS_MOUNT_TCP;
1056 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 952 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1264,20 +1160,33 @@ static int nfs_parse_mount_options(char *raw,
1264 switch (option) { 1160 switch (option) {
1265 case NFS2_VERSION: 1161 case NFS2_VERSION:
1266 mnt->flags &= ~NFS_MOUNT_VER3; 1162 mnt->flags &= ~NFS_MOUNT_VER3;
1163 mnt->version = 2;
1267 break; 1164 break;
1268 case NFS3_VERSION: 1165 case NFS3_VERSION:
1269 mnt->flags |= NFS_MOUNT_VER3; 1166 mnt->flags |= NFS_MOUNT_VER3;
1167 mnt->version = 3;
1270 break; 1168 break;
1169#ifdef CONFIG_NFS_V4
1170 case NFS4_VERSION:
1171 mnt->flags &= ~NFS_MOUNT_VER3;
1172 mnt->version = 4;
1173 break;
1174#endif
1271 default: 1175 default:
1272 goto out_invalid_value; 1176 goto out_invalid_value;
1273 } 1177 }
1274 break; 1178 break;
1275 case Opt_minorversion: 1179 case Opt_minorversion:
1276 if (match_int(args, &int_option)) 1180 string = match_strdup(args);
1277 return 0; 1181 if (string == NULL)
1278 if (int_option < 0 || int_option > NFS4_MAX_MINOR_VERSION) 1182 goto out_nomem;
1279 return 0; 1183 rc = strict_strtoul(string, 10, &option);
1280 mnt->minorversion = int_option; 1184 kfree(string);
1185 if (rc != 0)
1186 goto out_invalid_value;
1187 if (option > NFS4_MAX_MINOR_VERSION)
1188 goto out_invalid_value;
1189 mnt->minorversion = option;
1281 break; 1190 break;
1282 1191
1283 /* 1192 /*
@@ -1352,11 +1261,14 @@ static int nfs_parse_mount_options(char *raw,
1352 string = match_strdup(args); 1261 string = match_strdup(args);
1353 if (string == NULL) 1262 if (string == NULL)
1354 goto out_nomem; 1263 goto out_nomem;
1355 nfs_parse_ip_address(string, strlen(string), 1264 mnt->nfs_server.addrlen =
1356 (struct sockaddr *) 1265 rpc_pton(string, strlen(string),
1357 &mnt->nfs_server.address, 1266 (struct sockaddr *)
1358 &mnt->nfs_server.addrlen); 1267 &mnt->nfs_server.address,
1268 sizeof(mnt->nfs_server.address));
1359 kfree(string); 1269 kfree(string);
1270 if (mnt->nfs_server.addrlen == 0)
1271 goto out_invalid_address;
1360 break; 1272 break;
1361 case Opt_clientaddr: 1273 case Opt_clientaddr:
1362 string = match_strdup(args); 1274 string = match_strdup(args);
@@ -1376,11 +1288,14 @@ static int nfs_parse_mount_options(char *raw,
1376 string = match_strdup(args); 1288 string = match_strdup(args);
1377 if (string == NULL) 1289 if (string == NULL)
1378 goto out_nomem; 1290 goto out_nomem;
1379 nfs_parse_ip_address(string, strlen(string), 1291 mnt->mount_server.addrlen =
1380 (struct sockaddr *) 1292 rpc_pton(string, strlen(string),
1381 &mnt->mount_server.address, 1293 (struct sockaddr *)
1382 &mnt->mount_server.addrlen); 1294 &mnt->mount_server.address,
1295 sizeof(mnt->mount_server.address));
1383 kfree(string); 1296 kfree(string);
1297 if (mnt->mount_server.addrlen == 0)
1298 goto out_invalid_address;
1384 break; 1299 break;
1385 case Opt_lookupcache: 1300 case Opt_lookupcache:
1386 string = match_strdup(args); 1301 string = match_strdup(args);
@@ -1432,8 +1347,11 @@ static int nfs_parse_mount_options(char *raw,
1432 1347
1433 return 1; 1348 return 1;
1434 1349
1350out_invalid_address:
1351 printk(KERN_INFO "NFS: bad IP address specified: %s\n", p);
1352 return 0;
1435out_invalid_value: 1353out_invalid_value:
1436 printk(KERN_INFO "NFS: bad mount option value specified: %s \n", p); 1354 printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p);
1437 return 0; 1355 return 0;
1438out_nomem: 1356out_nomem:
1439 printk(KERN_INFO "NFS: not enough memory to parse option\n"); 1357 printk(KERN_INFO "NFS: not enough memory to parse option\n");
@@ -1445,13 +1363,60 @@ out_security_failure:
1445} 1363}
1446 1364
1447/* 1365/*
1366 * Match the requested auth flavors with the list returned by
1367 * the server. Returns zero and sets the mount's authentication
1368 * flavor on success; returns -EACCES if server does not support
1369 * the requested flavor.
1370 */
1371static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
1372 struct nfs_mount_request *request)
1373{
1374 unsigned int i, j, server_authlist_len = *(request->auth_flav_len);
1375
1376 /*
1377 * Certain releases of Linux's mountd return an empty
1378 * flavor list. To prevent behavioral regression with
1379 * these servers (ie. rejecting mounts that used to
1380 * succeed), revert to pre-2.6.32 behavior (no checking)
1381 * if the returned flavor list is empty.
1382 */
1383 if (server_authlist_len == 0)
1384 return 0;
1385
1386 /*
1387 * We avoid sophisticated negotiating here, as there are
1388 * plenty of cases where we can get it wrong, providing
1389 * either too little or too much security.
1390 *
1391 * RFC 2623, section 2.7 suggests we SHOULD prefer the
1392 * flavor listed first. However, some servers list
1393 * AUTH_NULL first. Our caller plants AUTH_SYS, the
1394 * preferred default, in args->auth_flavors[0] if user
1395 * didn't specify sec= mount option.
1396 */
1397 for (i = 0; i < args->auth_flavor_len; i++)
1398 for (j = 0; j < server_authlist_len; j++)
1399 if (args->auth_flavors[i] == request->auth_flavs[j]) {
1400 dfprintk(MOUNT, "NFS: using auth flavor %d\n",
1401 request->auth_flavs[j]);
1402 args->auth_flavors[0] = request->auth_flavs[j];
1403 return 0;
1404 }
1405
1406 dfprintk(MOUNT, "NFS: server does not support requested auth flavor\n");
1407 nfs_umount(request);
1408 return -EACCES;
1409}
1410
1411/*
1448 * Use the remote server's MOUNT service to request the NFS file handle 1412 * Use the remote server's MOUNT service to request the NFS file handle
1449 * corresponding to the provided path. 1413 * corresponding to the provided path.
1450 */ 1414 */
1451static int nfs_try_mount(struct nfs_parsed_mount_data *args, 1415static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1452 struct nfs_fh *root_fh) 1416 struct nfs_fh *root_fh)
1453{ 1417{
1454 unsigned int auth_flavor_len = 0; 1418 rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];
1419 unsigned int server_authlist_len = ARRAY_SIZE(server_authlist);
1455 struct nfs_mount_request request = { 1420 struct nfs_mount_request request = {
1456 .sap = (struct sockaddr *) 1421 .sap = (struct sockaddr *)
1457 &args->mount_server.address, 1422 &args->mount_server.address,
@@ -1459,7 +1424,8 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1459 .protocol = args->mount_server.protocol, 1424 .protocol = args->mount_server.protocol,
1460 .fh = root_fh, 1425 .fh = root_fh,
1461 .noresvport = args->flags & NFS_MOUNT_NORESVPORT, 1426 .noresvport = args->flags & NFS_MOUNT_NORESVPORT,
1462 .auth_flav_len = &auth_flavor_len, 1427 .auth_flav_len = &server_authlist_len,
1428 .auth_flavs = server_authlist,
1463 }; 1429 };
1464 int status; 1430 int status;
1465 1431
@@ -1485,23 +1451,25 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1485 args->mount_server.addrlen = args->nfs_server.addrlen; 1451 args->mount_server.addrlen = args->nfs_server.addrlen;
1486 } 1452 }
1487 request.salen = args->mount_server.addrlen; 1453 request.salen = args->mount_server.addrlen;
1488 1454 nfs_set_default_port(request.sap, args->mount_server.port, 0);
1489 /*
1490 * autobind will be used if mount_server.port == 0
1491 */
1492 nfs_set_port(request.sap, args->mount_server.port);
1493 1455
1494 /* 1456 /*
1495 * Now ask the mount server to map our export path 1457 * Now ask the mount server to map our export path
1496 * to a file handle. 1458 * to a file handle.
1497 */ 1459 */
1498 status = nfs_mount(&request); 1460 status = nfs_mount(&request);
1499 if (status == 0) 1461 if (status != 0) {
1500 return 0; 1462 dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
1463 request.hostname, status);
1464 return status;
1465 }
1501 1466
1502 dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", 1467 /*
1503 request.hostname, status); 1468 * MNTv1 (NFSv2) does not support auth flavor negotiation.
1504 return status; 1469 */
1470 if (args->mount_server.version != NFS_MNT3_VERSION)
1471 return 0;
1472 return nfs_walk_authlist(args, &request);
1505} 1473}
1506 1474
1507static int nfs_parse_simple_hostname(const char *dev_name, 1475static int nfs_parse_simple_hostname(const char *dev_name,
@@ -1661,6 +1629,7 @@ static int nfs_validate_mount_data(void *options,
1661 const char *dev_name) 1629 const char *dev_name)
1662{ 1630{
1663 struct nfs_mount_data *data = (struct nfs_mount_data *)options; 1631 struct nfs_mount_data *data = (struct nfs_mount_data *)options;
1632 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
1664 1633
1665 if (data == NULL) 1634 if (data == NULL)
1666 goto out_no_data; 1635 goto out_no_data;
@@ -1672,10 +1641,12 @@ static int nfs_validate_mount_data(void *options,
1672 args->acregmax = NFS_DEF_ACREGMAX; 1641 args->acregmax = NFS_DEF_ACREGMAX;
1673 args->acdirmin = NFS_DEF_ACDIRMIN; 1642 args->acdirmin = NFS_DEF_ACDIRMIN;
1674 args->acdirmax = NFS_DEF_ACDIRMAX; 1643 args->acdirmax = NFS_DEF_ACDIRMAX;
1675 args->mount_server.port = 0; /* autobind unless user sets port */ 1644 args->mount_server.port = NFS_UNSPEC_PORT;
1676 args->nfs_server.port = 0; /* autobind unless user sets port */ 1645 args->nfs_server.port = NFS_UNSPEC_PORT;
1677 args->nfs_server.protocol = XPRT_TRANSPORT_TCP; 1646 args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1678 args->auth_flavors[0] = RPC_AUTH_UNIX; 1647 args->auth_flavors[0] = RPC_AUTH_UNIX;
1648 args->auth_flavor_len = 1;
1649 args->minorversion = 0;
1679 1650
1680 switch (data->version) { 1651 switch (data->version) {
1681 case 1: 1652 case 1:
@@ -1697,8 +1668,11 @@ static int nfs_validate_mount_data(void *options,
1697 if (data->root.size > NFS3_FHSIZE || data->root.size == 0) 1668 if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
1698 goto out_invalid_fh; 1669 goto out_invalid_fh;
1699 mntfh->size = data->root.size; 1670 mntfh->size = data->root.size;
1700 } else 1671 args->version = 3;
1672 } else {
1701 mntfh->size = NFS2_FHSIZE; 1673 mntfh->size = NFS2_FHSIZE;
1674 args->version = 2;
1675 }
1702 1676
1703 1677
1704 memcpy(mntfh->data, data->root.data, mntfh->size); 1678 memcpy(mntfh->data, data->root.data, mntfh->size);
@@ -1720,11 +1694,9 @@ static int nfs_validate_mount_data(void *options,
1720 args->acdirmin = data->acdirmin; 1694 args->acdirmin = data->acdirmin;
1721 args->acdirmax = data->acdirmax; 1695 args->acdirmax = data->acdirmax;
1722 1696
1723 memcpy(&args->nfs_server.address, &data->addr, 1697 memcpy(sap, &data->addr, sizeof(data->addr));
1724 sizeof(data->addr));
1725 args->nfs_server.addrlen = sizeof(data->addr); 1698 args->nfs_server.addrlen = sizeof(data->addr);
1726 if (!nfs_verify_server_address((struct sockaddr *) 1699 if (!nfs_verify_server_address(sap))
1727 &args->nfs_server.address))
1728 goto out_no_address; 1700 goto out_no_address;
1729 1701
1730 if (!(data->flags & NFS_MOUNT_TCP)) 1702 if (!(data->flags & NFS_MOUNT_TCP))
@@ -1772,12 +1744,18 @@ static int nfs_validate_mount_data(void *options,
1772 if (nfs_parse_mount_options((char *)options, args) == 0) 1744 if (nfs_parse_mount_options((char *)options, args) == 0)
1773 return -EINVAL; 1745 return -EINVAL;
1774 1746
1775 if (!nfs_verify_server_address((struct sockaddr *) 1747 if (!nfs_verify_server_address(sap))
1776 &args->nfs_server.address))
1777 goto out_no_address; 1748 goto out_no_address;
1778 1749
1779 nfs_set_port((struct sockaddr *)&args->nfs_server.address, 1750 if (args->version == 4)
1780 args->nfs_server.port); 1751#ifdef CONFIG_NFS_V4
1752 return nfs4_validate_text_mount_data(options,
1753 args, dev_name);
1754#else
1755 goto out_v4_not_compiled;
1756#endif
1757
1758 nfs_set_default_port(sap, args->nfs_server.port, 0);
1781 1759
1782 nfs_set_mount_transport_protocol(args); 1760 nfs_set_mount_transport_protocol(args);
1783 1761
@@ -1825,6 +1803,12 @@ out_v3_not_compiled:
1825 return -EPROTONOSUPPORT; 1803 return -EPROTONOSUPPORT;
1826#endif /* !CONFIG_NFS_V3 */ 1804#endif /* !CONFIG_NFS_V3 */
1827 1805
1806#ifndef CONFIG_NFS_V4
1807out_v4_not_compiled:
1808 dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
1809 return -EPROTONOSUPPORT;
1810#endif /* !CONFIG_NFS_V4 */
1811
1828out_nomem: 1812out_nomem:
1829 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); 1813 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
1830 return -ENOMEM; 1814 return -ENOMEM;
@@ -2120,6 +2104,14 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2120 if (error < 0) 2104 if (error < 0)
2121 goto out; 2105 goto out;
2122 2106
2107#ifdef CONFIG_NFS_V4
2108 if (data->version == 4) {
2109 error = nfs4_try_mount(flags, dev_name, data, mnt);
2110 kfree(data->client_address);
2111 goto out;
2112 }
2113#endif /* CONFIG_NFS_V4 */
2114
2123 /* Get a volume representation */ 2115 /* Get a volume representation */
2124 server = nfs_create_server(data, mntfh); 2116 server = nfs_create_server(data, mntfh);
2125 if (IS_ERR(server)) { 2117 if (IS_ERR(server)) {
@@ -2317,6 +2309,43 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
2317 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); 2309 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3);
2318} 2310}
2319 2311
2312static int nfs4_validate_text_mount_data(void *options,
2313 struct nfs_parsed_mount_data *args,
2314 const char *dev_name)
2315{
2316 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2317
2318 nfs_set_default_port(sap, args->nfs_server.port, NFS_PORT);
2319
2320 nfs_validate_transport_protocol(args);
2321
2322 nfs4_validate_mount_flags(args);
2323
2324 if (args->version != 4) {
2325 dfprintk(MOUNT,
2326 "NFS4: Illegal mount version\n");
2327 return -EINVAL;
2328 }
2329
2330 if (args->auth_flavor_len > 1) {
2331 dfprintk(MOUNT,
2332 "NFS4: Too many RPC auth flavours specified\n");
2333 return -EINVAL;
2334 }
2335
2336 if (args->client_address == NULL) {
2337 dfprintk(MOUNT,
2338 "NFS4: mount program didn't pass callback address\n");
2339 return -EINVAL;
2340 }
2341
2342 return nfs_parse_devname(dev_name,
2343 &args->nfs_server.hostname,
2344 NFS4_MAXNAMLEN,
2345 &args->nfs_server.export_path,
2346 NFS4_MAXPATHLEN);
2347}
2348
2320/* 2349/*
2321 * Validate NFSv4 mount options 2350 * Validate NFSv4 mount options
2322 */ 2351 */
@@ -2324,7 +2353,7 @@ static int nfs4_validate_mount_data(void *options,
2324 struct nfs_parsed_mount_data *args, 2353 struct nfs_parsed_mount_data *args,
2325 const char *dev_name) 2354 const char *dev_name)
2326{ 2355{
2327 struct sockaddr_in *ap; 2356 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2328 struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; 2357 struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
2329 char *c; 2358 char *c;
2330 2359
@@ -2337,23 +2366,22 @@ static int nfs4_validate_mount_data(void *options,
2337 args->acregmax = NFS_DEF_ACREGMAX; 2366 args->acregmax = NFS_DEF_ACREGMAX;
2338 args->acdirmin = NFS_DEF_ACDIRMIN; 2367 args->acdirmin = NFS_DEF_ACDIRMIN;
2339 args->acdirmax = NFS_DEF_ACDIRMAX; 2368 args->acdirmax = NFS_DEF_ACDIRMAX;
2340 args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ 2369 args->nfs_server.port = NFS_UNSPEC_PORT;
2341 args->auth_flavors[0] = RPC_AUTH_UNIX; 2370 args->auth_flavors[0] = RPC_AUTH_UNIX;
2342 args->auth_flavor_len = 0; 2371 args->auth_flavor_len = 1;
2372 args->version = 4;
2343 args->minorversion = 0; 2373 args->minorversion = 0;
2344 2374
2345 switch (data->version) { 2375 switch (data->version) {
2346 case 1: 2376 case 1:
2347 ap = (struct sockaddr_in *)&args->nfs_server.address;
2348 if (data->host_addrlen > sizeof(args->nfs_server.address)) 2377 if (data->host_addrlen > sizeof(args->nfs_server.address))
2349 goto out_no_address; 2378 goto out_no_address;
2350 if (data->host_addrlen == 0) 2379 if (data->host_addrlen == 0)
2351 goto out_no_address; 2380 goto out_no_address;
2352 args->nfs_server.addrlen = data->host_addrlen; 2381 args->nfs_server.addrlen = data->host_addrlen;
2353 if (copy_from_user(ap, data->host_addr, data->host_addrlen)) 2382 if (copy_from_user(sap, data->host_addr, data->host_addrlen))
2354 return -EFAULT; 2383 return -EFAULT;
2355 if (!nfs_verify_server_address((struct sockaddr *) 2384 if (!nfs_verify_server_address(sap))
2356 &args->nfs_server.address))
2357 goto out_no_address; 2385 goto out_no_address;
2358 2386
2359 if (data->auth_flavourlen) { 2387 if (data->auth_flavourlen) {
@@ -2399,39 +2427,14 @@ static int nfs4_validate_mount_data(void *options,
2399 nfs_validate_transport_protocol(args); 2427 nfs_validate_transport_protocol(args);
2400 2428
2401 break; 2429 break;
2402 default: { 2430 default:
2403 int status;
2404
2405 if (nfs_parse_mount_options((char *)options, args) == 0) 2431 if (nfs_parse_mount_options((char *)options, args) == 0)
2406 return -EINVAL; 2432 return -EINVAL;
2407 2433
2408 if (!nfs_verify_server_address((struct sockaddr *) 2434 if (!nfs_verify_server_address(sap))
2409 &args->nfs_server.address))
2410 return -EINVAL; 2435 return -EINVAL;
2411 2436
2412 nfs_set_port((struct sockaddr *)&args->nfs_server.address, 2437 return nfs4_validate_text_mount_data(options, args, dev_name);
2413 args->nfs_server.port);
2414
2415 nfs_validate_transport_protocol(args);
2416
2417 nfs4_validate_mount_flags(args);
2418
2419 if (args->auth_flavor_len > 1)
2420 goto out_inval_auth;
2421
2422 if (args->client_address == NULL)
2423 goto out_no_client_address;
2424
2425 status = nfs_parse_devname(dev_name,
2426 &args->nfs_server.hostname,
2427 NFS4_MAXNAMLEN,
2428 &args->nfs_server.export_path,
2429 NFS4_MAXPATHLEN);
2430 if (status < 0)
2431 return status;
2432
2433 break;
2434 }
2435 } 2438 }
2436 2439
2437 return 0; 2440 return 0;
@@ -2448,10 +2451,6 @@ out_inval_auth:
2448out_no_address: 2451out_no_address:
2449 dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); 2452 dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
2450 return -EINVAL; 2453 return -EINVAL;
2451
2452out_no_client_address:
2453 dfprintk(MOUNT, "NFS4: mount program didn't pass callback address\n");
2454 return -EINVAL;
2455} 2454}
2456 2455
2457/* 2456/*
@@ -2618,6 +2617,34 @@ out_err:
2618 return ret; 2617 return ret;
2619} 2618}
2620 2619
2620static int nfs4_try_mount(int flags, const char *dev_name,
2621 struct nfs_parsed_mount_data *data,
2622 struct vfsmount *mnt)
2623{
2624 char *export_path;
2625 struct vfsmount *root_mnt;
2626 int error;
2627
2628 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2629
2630 export_path = data->nfs_server.export_path;
2631 data->nfs_server.export_path = "/";
2632 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data,
2633 data->nfs_server.hostname);
2634 data->nfs_server.export_path = export_path;
2635
2636 error = PTR_ERR(root_mnt);
2637 if (IS_ERR(root_mnt))
2638 goto out;
2639
2640 error = nfs_follow_remote_path(root_mnt, export_path, mnt);
2641
2642out:
2643 dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", error,
2644 error != 0 ? " [error]" : "");
2645 return error;
2646}
2647
2621/* 2648/*
2622 * Get the superblock for an NFS4 mountpoint 2649 * Get the superblock for an NFS4 mountpoint
2623 */ 2650 */
@@ -2625,8 +2652,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2625 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2652 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
2626{ 2653{
2627 struct nfs_parsed_mount_data *data; 2654 struct nfs_parsed_mount_data *data;
2628 char *export_path;
2629 struct vfsmount *root_mnt;
2630 int error = -ENOMEM; 2655 int error = -ENOMEM;
2631 2656
2632 data = kzalloc(sizeof(*data), GFP_KERNEL); 2657 data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -2638,17 +2663,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2638 if (error < 0) 2663 if (error < 0)
2639 goto out; 2664 goto out;
2640 2665
2641 export_path = data->nfs_server.export_path; 2666 error = nfs4_try_mount(flags, dev_name, data, mnt);
2642 data->nfs_server.export_path = "/";
2643 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data,
2644 data->nfs_server.hostname);
2645 data->nfs_server.export_path = export_path;
2646
2647 error = PTR_ERR(root_mnt);
2648 if (IS_ERR(root_mnt))
2649 goto out;
2650
2651 error = nfs_follow_remote_path(root_mnt, export_path, mnt);
2652 2667
2653out: 2668out:
2654 kfree(data->client_address); 2669 kfree(data->client_address);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index a34fae21fe10..120acadc6a84 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -13,6 +13,7 @@
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/writeback.h> 14#include <linux/writeback.h>
15#include <linux/swap.h> 15#include <linux/swap.h>
16#include <linux/migrate.h>
16 17
17#include <linux/sunrpc/clnt.h> 18#include <linux/sunrpc/clnt.h>
18#include <linux/nfs_fs.h> 19#include <linux/nfs_fs.h>
@@ -26,6 +27,7 @@
26#include "internal.h" 27#include "internal.h"
27#include "iostat.h" 28#include "iostat.h"
28#include "nfs4_fs.h" 29#include "nfs4_fs.h"
30#include "fscache.h"
29 31
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 32#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 33
@@ -218,24 +220,17 @@ static void nfs_end_page_writeback(struct page *page)
218 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 220 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
219} 221}
220 222
221/* 223static struct nfs_page *nfs_find_and_lock_request(struct page *page)
222 * Find an associated nfs write request, and prepare to flush it out
223 * May return an error if the user signalled nfs_wait_on_request().
224 */
225static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
226 struct page *page)
227{ 224{
228 struct inode *inode = page->mapping->host; 225 struct inode *inode = page->mapping->host;
229 struct nfs_page *req; 226 struct nfs_page *req;
230 int ret; 227 int ret;
231 228
232 spin_lock(&inode->i_lock); 229 spin_lock(&inode->i_lock);
233 for(;;) { 230 for (;;) {
234 req = nfs_page_find_request_locked(page); 231 req = nfs_page_find_request_locked(page);
235 if (req == NULL) { 232 if (req == NULL)
236 spin_unlock(&inode->i_lock); 233 break;
237 return 0;
238 }
239 if (nfs_set_page_tag_locked(req)) 234 if (nfs_set_page_tag_locked(req))
240 break; 235 break;
241 /* Note: If we hold the page lock, as is the case in nfs_writepage, 236 /* Note: If we hold the page lock, as is the case in nfs_writepage,
@@ -247,23 +242,40 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
247 ret = nfs_wait_on_request(req); 242 ret = nfs_wait_on_request(req);
248 nfs_release_request(req); 243 nfs_release_request(req);
249 if (ret != 0) 244 if (ret != 0)
250 return ret; 245 return ERR_PTR(ret);
251 spin_lock(&inode->i_lock); 246 spin_lock(&inode->i_lock);
252 } 247 }
253 if (test_bit(PG_CLEAN, &req->wb_flags)) {
254 spin_unlock(&inode->i_lock);
255 BUG();
256 }
257 if (nfs_set_page_writeback(page) != 0) {
258 spin_unlock(&inode->i_lock);
259 BUG();
260 }
261 spin_unlock(&inode->i_lock); 248 spin_unlock(&inode->i_lock);
249 return req;
250}
251
252/*
253 * Find an associated nfs write request, and prepare to flush it out
254 * May return an error if the user signalled nfs_wait_on_request().
255 */
256static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
257 struct page *page)
258{
259 struct nfs_page *req;
260 int ret = 0;
261
262 req = nfs_find_and_lock_request(page);
263 if (!req)
264 goto out;
265 ret = PTR_ERR(req);
266 if (IS_ERR(req))
267 goto out;
268
269 ret = nfs_set_page_writeback(page);
270 BUG_ON(ret != 0);
271 BUG_ON(test_bit(PG_CLEAN, &req->wb_flags));
272
262 if (!nfs_pageio_add_request(pgio, req)) { 273 if (!nfs_pageio_add_request(pgio, req)) {
263 nfs_redirty_request(req); 274 nfs_redirty_request(req);
264 return pgio->pg_error; 275 ret = pgio->pg_error;
265 } 276 }
266 return 0; 277out:
278 return ret;
267} 279}
268 280
269static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 281static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
@@ -1580,6 +1592,41 @@ int nfs_wb_page(struct inode *inode, struct page* page)
1580 return nfs_wb_page_priority(inode, page, FLUSH_STABLE); 1592 return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
1581} 1593}
1582 1594
1595#ifdef CONFIG_MIGRATION
1596int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1597 struct page *page)
1598{
1599 struct nfs_page *req;
1600 int ret;
1601
1602 if (PageFsCache(page))
1603 nfs_fscache_release_page(page, GFP_KERNEL);
1604
1605 req = nfs_find_and_lock_request(page);
1606 ret = PTR_ERR(req);
1607 if (IS_ERR(req))
1608 goto out;
1609
1610 ret = migrate_page(mapping, newpage, page);
1611 if (!req)
1612 goto out;
1613 if (ret)
1614 goto out_unlock;
1615 page_cache_get(newpage);
1616 req->wb_page = newpage;
1617 SetPagePrivate(newpage);
1618 set_page_private(newpage, page_private(page));
1619 ClearPagePrivate(page);
1620 set_page_private(page, 0);
1621 page_cache_release(page);
1622out_unlock:
1623 nfs_clear_page_tag_locked(req);
1624 nfs_release_request(req);
1625out:
1626 return ret;
1627}
1628#endif
1629
1583int __init nfs_init_writepagecache(void) 1630int __init nfs_init_writepagecache(void)
1584{ 1631{
1585 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1632 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 5573508f707f..36fcabbf5186 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
34 int flags = nfsexp_flags(rqstp, exp); 34 int flags = nfsexp_flags(rqstp, exp);
35 int ret; 35 int ret;
36 36
37 validate_process_creds();
38
37 /* discard any old override before preparing the new set */ 39 /* discard any old override before preparing the new set */
38 revert_creds(get_cred(current->real_cred)); 40 revert_creds(get_cred(current->real_cred));
39 new = prepare_creds(); 41 new = prepare_creds();
@@ -86,8 +88,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
86 else 88 else
87 new->cap_effective = cap_raise_nfsd_set(new->cap_effective, 89 new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
88 new->cap_permitted); 90 new->cap_permitted);
91 validate_process_creds();
89 put_cred(override_creds(new)); 92 put_cred(override_creds(new));
90 put_cred(new); 93 put_cred(new);
94 validate_process_creds();
91 return 0; 95 return 0;
92 96
93oom: 97oom:
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b92a27629fb7..d9462643155c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -85,6 +85,11 @@ static void expkey_request(struct cache_detail *cd,
85 (*bpp)[-1] = '\n'; 85 (*bpp)[-1] = '\n';
86} 86}
87 87
88static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
89{
90 return sunrpc_cache_pipe_upcall(cd, h, expkey_request);
91}
92
88static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); 93static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
89static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); 94static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
90static struct cache_detail svc_expkey_cache; 95static struct cache_detail svc_expkey_cache;
@@ -259,7 +264,7 @@ static struct cache_detail svc_expkey_cache = {
259 .hash_table = expkey_table, 264 .hash_table = expkey_table,
260 .name = "nfsd.fh", 265 .name = "nfsd.fh",
261 .cache_put = expkey_put, 266 .cache_put = expkey_put,
262 .cache_request = expkey_request, 267 .cache_upcall = expkey_upcall,
263 .cache_parse = expkey_parse, 268 .cache_parse = expkey_parse,
264 .cache_show = expkey_show, 269 .cache_show = expkey_show,
265 .match = expkey_match, 270 .match = expkey_match,
@@ -355,6 +360,11 @@ static void svc_export_request(struct cache_detail *cd,
355 (*bpp)[-1] = '\n'; 360 (*bpp)[-1] = '\n';
356} 361}
357 362
363static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
364{
365 return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
366}
367
358static struct svc_export *svc_export_update(struct svc_export *new, 368static struct svc_export *svc_export_update(struct svc_export *new,
359 struct svc_export *old); 369 struct svc_export *old);
360static struct svc_export *svc_export_lookup(struct svc_export *); 370static struct svc_export *svc_export_lookup(struct svc_export *);
@@ -724,7 +734,7 @@ struct cache_detail svc_export_cache = {
724 .hash_table = export_table, 734 .hash_table = export_table,
725 .name = "nfsd.export", 735 .name = "nfsd.export",
726 .cache_put = svc_export_put, 736 .cache_put = svc_export_put,
727 .cache_request = svc_export_request, 737 .cache_upcall = svc_export_upcall,
728 .cache_parse = svc_export_parse, 738 .cache_parse = svc_export_parse,
729 .cache_show = svc_export_show, 739 .cache_show = svc_export_show,
730 .match = svc_export_match, 740 .match = svc_export_match,
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5b398421b051..cdfa86fa1471 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -146,6 +146,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
146} 146}
147 147
148static int 148static int
149idtoname_upcall(struct cache_detail *cd, struct cache_head *ch)
150{
151 return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request);
152}
153
154static int
149idtoname_match(struct cache_head *ca, struct cache_head *cb) 155idtoname_match(struct cache_head *ca, struct cache_head *cb)
150{ 156{
151 struct ent *a = container_of(ca, struct ent, h); 157 struct ent *a = container_of(ca, struct ent, h);
@@ -175,10 +181,10 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
175} 181}
176 182
177static void 183static void
178warn_no_idmapd(struct cache_detail *detail) 184warn_no_idmapd(struct cache_detail *detail, int has_died)
179{ 185{
180 printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n", 186 printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n",
181 detail->last_close? "died" : "not been started"); 187 has_died ? "died" : "not been started");
182} 188}
183 189
184 190
@@ -192,7 +198,7 @@ static struct cache_detail idtoname_cache = {
192 .hash_table = idtoname_table, 198 .hash_table = idtoname_table,
193 .name = "nfs4.idtoname", 199 .name = "nfs4.idtoname",
194 .cache_put = ent_put, 200 .cache_put = ent_put,
195 .cache_request = idtoname_request, 201 .cache_upcall = idtoname_upcall,
196 .cache_parse = idtoname_parse, 202 .cache_parse = idtoname_parse,
197 .cache_show = idtoname_show, 203 .cache_show = idtoname_show,
198 .warn_no_listener = warn_no_idmapd, 204 .warn_no_listener = warn_no_idmapd,
@@ -325,6 +331,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
325} 331}
326 332
327static int 333static int
334nametoid_upcall(struct cache_detail *cd, struct cache_head *ch)
335{
336 return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request);
337}
338
339static int
328nametoid_match(struct cache_head *ca, struct cache_head *cb) 340nametoid_match(struct cache_head *ca, struct cache_head *cb)
329{ 341{
330 struct ent *a = container_of(ca, struct ent, h); 342 struct ent *a = container_of(ca, struct ent, h);
@@ -363,7 +375,7 @@ static struct cache_detail nametoid_cache = {
363 .hash_table = nametoid_table, 375 .hash_table = nametoid_table,
364 .name = "nfs4.nametoid", 376 .name = "nfs4.nametoid",
365 .cache_put = ent_put, 377 .cache_put = ent_put,
366 .cache_request = nametoid_request, 378 .cache_upcall = nametoid_upcall,
367 .cache_parse = nametoid_parse, 379 .cache_parse = nametoid_parse,
368 .cache_show = nametoid_show, 380 .cache_show = nametoid_show,
369 .warn_no_listener = warn_no_idmapd, 381 .warn_no_listener = warn_no_idmapd,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 6d0847562d87..7e906c5b7671 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -37,6 +37,7 @@
37#include <linux/nfsd/xdr.h> 37#include <linux/nfsd/xdr.h>
38#include <linux/nfsd/syscall.h> 38#include <linux/nfsd/syscall.h>
39#include <linux/lockd/lockd.h> 39#include <linux/lockd/lockd.h>
40#include <linux/sunrpc/clnt.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <net/ipv6.h> 43#include <net/ipv6.h>
@@ -490,22 +491,18 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
490 * 491 *
491 * Input: 492 * Input:
492 * buf: '\n'-terminated C string containing a 493 * buf: '\n'-terminated C string containing a
493 * presentation format IPv4 address 494 * presentation format IP address
494 * size: length of C string in @buf 495 * size: length of C string in @buf
495 * Output: 496 * Output:
496 * On success: returns zero if all specified locks were released; 497 * On success: returns zero if all specified locks were released;
497 * returns one if one or more locks were not released 498 * returns one if one or more locks were not released
498 * On error: return code is negative errno value 499 * On error: return code is negative errno value
499 *
500 * Note: Only AF_INET client addresses are passed in
501 */ 500 */
502static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) 501static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
503{ 502{
504 struct sockaddr_in sin = { 503 struct sockaddr_storage address;
505 .sin_family = AF_INET, 504 struct sockaddr *sap = (struct sockaddr *)&address;
506 }; 505 size_t salen = sizeof(address);
507 int b1, b2, b3, b4;
508 char c;
509 char *fo_path; 506 char *fo_path;
510 507
511 /* sanity check */ 508 /* sanity check */
@@ -519,14 +516,10 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
519 if (qword_get(&buf, fo_path, size) < 0) 516 if (qword_get(&buf, fo_path, size) < 0)
520 return -EINVAL; 517 return -EINVAL;
521 518
522 /* get ipv4 address */ 519 if (rpc_pton(fo_path, size, sap, salen) == 0)
523 if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
524 return -EINVAL;
525 if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
526 return -EINVAL; 520 return -EINVAL;
527 sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
528 521
529 return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin); 522 return nlmsvc_unlock_all_by_ip(sap);
530} 523}
531 524
532/** 525/**
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 492c79b7800b..24d58adfe5fd 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -496,7 +496,9 @@ nfsd(void *vrqstp)
496 /* Lock the export hash tables for reading. */ 496 /* Lock the export hash tables for reading. */
497 exp_readlock(); 497 exp_readlock();
498 498
499 validate_process_creds();
499 svc_process(rqstp); 500 svc_process(rqstp);
501 validate_process_creds();
500 502
501 /* Unlock export hash tables */ 503 /* Unlock export hash tables */
502 exp_readunlock(); 504 exp_readunlock();
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 23341c1063bc..8fa09bfbcba7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -684,6 +684,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
684 __be32 err; 684 __be32 err;
685 int host_err; 685 int host_err;
686 686
687 validate_process_creds();
688
687 /* 689 /*
688 * If we get here, then the client has already done an "open", 690 * If we get here, then the client has already done an "open",
689 * and (hopefully) checked permission - so allow OWNER_OVERRIDE 691 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
@@ -740,6 +742,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
740out_nfserr: 742out_nfserr:
741 err = nfserrno(host_err); 743 err = nfserrno(host_err);
742out: 744out:
745 validate_process_creds();
743 return err; 746 return err;
744} 747}
745 748
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 72da095d4009..251da07b2a1d 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -1,6 +1,6 @@
1config NILFS2_FS 1config NILFS2_FS
2 tristate "NILFS2 file system support (EXPERIMENTAL)" 2 tristate "NILFS2 file system support (EXPERIMENTAL)"
3 depends on BLOCK && EXPERIMENTAL 3 depends on EXPERIMENTAL
4 select CRC32 4 select CRC32
5 help 5 help
6 NILFS2 is a log-structured file system (LFS) supporting continuous 6 NILFS2 is a log-structured file system (LFS) supporting continuous
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 99d58a028b94..08834df6ec68 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -36,6 +36,26 @@ struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
36 return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode)); 36 return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode));
37} 37}
38 38
39/**
40 * nilfs_bmap_lookup_at_level - find a data block or node block
41 * @bmap: bmap
42 * @key: key
43 * @level: level
44 * @ptrp: place to store the value associated to @key
45 *
46 * Description: nilfs_bmap_lookup_at_level() finds a record whose key
47 * matches @key in the block at @level of the bmap.
48 *
49 * Return Value: On success, 0 is returned and the record associated with @key
50 * is stored in the place pointed by @ptrp. On error, one of the following
51 * negative error codes is returned.
52 *
53 * %-EIO - I/O error.
54 *
55 * %-ENOMEM - Insufficient amount of memory available.
56 *
57 * %-ENOENT - A record associated with @key does not exist.
58 */
39int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level, 59int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
40 __u64 *ptrp) 60 __u64 *ptrp)
41{ 61{
@@ -69,39 +89,6 @@ int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
69 return ret; 89 return ret;
70} 90}
71 91
72/**
73 * nilfs_bmap_lookup - find a record
74 * @bmap: bmap
75 * @key: key
76 * @recp: pointer to record
77 *
78 * Description: nilfs_bmap_lookup() finds a record whose key matches @key in
79 * @bmap.
80 *
81 * Return Value: On success, 0 is returned and the record associated with @key
82 * is stored in the place pointed by @recp. On error, one of the following
83 * negative error codes is returned.
84 *
85 * %-EIO - I/O error.
86 *
87 * %-ENOMEM - Insufficient amount of memory available.
88 *
89 * %-ENOENT - A record associated with @key does not exist.
90 */
91int nilfs_bmap_lookup(struct nilfs_bmap *bmap,
92 unsigned long key,
93 unsigned long *recp)
94{
95 __u64 ptr;
96 int ret;
97
98 /* XXX: use macro for level 1 */
99 ret = nilfs_bmap_lookup_at_level(bmap, key, 1, &ptr);
100 if (recp != NULL)
101 *recp = ptr;
102 return ret;
103}
104
105static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) 92static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
106{ 93{
107 __u64 keys[NILFS_BMAP_SMALL_HIGH + 1]; 94 __u64 keys[NILFS_BMAP_SMALL_HIGH + 1];
@@ -469,104 +456,6 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
469 (entries_per_group / NILFS_BMAP_GROUP_DIV); 456 (entries_per_group / NILFS_BMAP_GROUP_DIV);
470} 457}
471 458
472int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap,
473 union nilfs_bmap_ptr_req *req)
474{
475 return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
476}
477
478void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap,
479 union nilfs_bmap_ptr_req *req)
480{
481 nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
482}
483
484void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap,
485 union nilfs_bmap_ptr_req *req)
486{
487 nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
488}
489
490int nilfs_bmap_start_v(struct nilfs_bmap *bmap, union nilfs_bmap_ptr_req *req,
491 sector_t blocknr)
492{
493 struct inode *dat = nilfs_bmap_get_dat(bmap);
494 int ret;
495
496 ret = nilfs_dat_prepare_start(dat, &req->bpr_req);
497 if (likely(!ret))
498 nilfs_dat_commit_start(dat, &req->bpr_req, blocknr);
499 return ret;
500}
501
502int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap,
503 union nilfs_bmap_ptr_req *req)
504{
505 return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
506}
507
508void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap,
509 union nilfs_bmap_ptr_req *req)
510{
511 nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req,
512 bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
513}
514
515void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap,
516 union nilfs_bmap_ptr_req *req)
517{
518 nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
519}
520
521int nilfs_bmap_move_v(const struct nilfs_bmap *bmap, __u64 vblocknr,
522 sector_t blocknr)
523{
524 return nilfs_dat_move(nilfs_bmap_get_dat(bmap), vblocknr, blocknr);
525}
526
527int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr)
528{
529 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr);
530}
531
532int nilfs_bmap_prepare_update_v(struct nilfs_bmap *bmap,
533 union nilfs_bmap_ptr_req *oldreq,
534 union nilfs_bmap_ptr_req *newreq)
535{
536 struct inode *dat = nilfs_bmap_get_dat(bmap);
537 int ret;
538
539 ret = nilfs_dat_prepare_end(dat, &oldreq->bpr_req);
540 if (ret < 0)
541 return ret;
542 ret = nilfs_dat_prepare_alloc(dat, &newreq->bpr_req);
543 if (ret < 0)
544 nilfs_dat_abort_end(dat, &oldreq->bpr_req);
545
546 return ret;
547}
548
549void nilfs_bmap_commit_update_v(struct nilfs_bmap *bmap,
550 union nilfs_bmap_ptr_req *oldreq,
551 union nilfs_bmap_ptr_req *newreq)
552{
553 struct inode *dat = nilfs_bmap_get_dat(bmap);
554
555 nilfs_dat_commit_end(dat, &oldreq->bpr_req,
556 bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
557 nilfs_dat_commit_alloc(dat, &newreq->bpr_req);
558}
559
560void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap,
561 union nilfs_bmap_ptr_req *oldreq,
562 union nilfs_bmap_ptr_req *newreq)
563{
564 struct inode *dat = nilfs_bmap_get_dat(bmap);
565
566 nilfs_dat_abort_end(dat, &oldreq->bpr_req);
567 nilfs_dat_abort_alloc(dat, &newreq->bpr_req);
568}
569
570static struct lock_class_key nilfs_bmap_dat_lock_key; 459static struct lock_class_key nilfs_bmap_dat_lock_key;
571static struct lock_class_key nilfs_bmap_mdt_lock_key; 460static struct lock_class_key nilfs_bmap_mdt_lock_key;
572 461
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index b2890cdcef12..9980d7dbab91 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -28,6 +28,7 @@
28#include <linux/buffer_head.h> 28#include <linux/buffer_head.h>
29#include <linux/nilfs2_fs.h> 29#include <linux/nilfs2_fs.h>
30#include "alloc.h" 30#include "alloc.h"
31#include "dat.h"
31 32
32#define NILFS_BMAP_INVALID_PTR 0 33#define NILFS_BMAP_INVALID_PTR 0
33 34
@@ -141,7 +142,6 @@ struct nilfs_bmap {
141int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); 142int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *);
142int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); 143int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *);
143void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *); 144void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *);
144int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *);
145int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned); 145int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned);
146int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long); 146int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long);
147int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long); 147int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long);
@@ -160,90 +160,76 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *);
160void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); 160void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *);
161 161
162 162
163static inline int nilfs_bmap_lookup(struct nilfs_bmap *bmap, __u64 key,
164 __u64 *ptr)
165{
166 return nilfs_bmap_lookup_at_level(bmap, key, 1, ptr);
167}
168
163/* 169/*
164 * Internal use only 170 * Internal use only
165 */ 171 */
166struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *); 172struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *);
167int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *,
168 union nilfs_bmap_ptr_req *);
169void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *,
170 union nilfs_bmap_ptr_req *);
171void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *,
172 union nilfs_bmap_ptr_req *);
173 173
174static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap, 174static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap,
175 union nilfs_bmap_ptr_req *req) 175 union nilfs_bmap_ptr_req *req,
176 struct inode *dat)
176{ 177{
177 if (NILFS_BMAP_USE_VBN(bmap)) 178 if (dat)
178 return nilfs_bmap_prepare_alloc_v(bmap, req); 179 return nilfs_dat_prepare_alloc(dat, &req->bpr_req);
179 /* ignore target ptr */ 180 /* ignore target ptr */
180 req->bpr_ptr = bmap->b_last_allocated_ptr++; 181 req->bpr_ptr = bmap->b_last_allocated_ptr++;
181 return 0; 182 return 0;
182} 183}
183 184
184static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap, 185static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap,
185 union nilfs_bmap_ptr_req *req) 186 union nilfs_bmap_ptr_req *req,
187 struct inode *dat)
186{ 188{
187 if (NILFS_BMAP_USE_VBN(bmap)) 189 if (dat)
188 nilfs_bmap_commit_alloc_v(bmap, req); 190 nilfs_dat_commit_alloc(dat, &req->bpr_req);
189} 191}
190 192
191static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap, 193static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap,
192 union nilfs_bmap_ptr_req *req) 194 union nilfs_bmap_ptr_req *req,
195 struct inode *dat)
193{ 196{
194 if (NILFS_BMAP_USE_VBN(bmap)) 197 if (dat)
195 nilfs_bmap_abort_alloc_v(bmap, req); 198 nilfs_dat_abort_alloc(dat, &req->bpr_req);
196 else 199 else
197 bmap->b_last_allocated_ptr--; 200 bmap->b_last_allocated_ptr--;
198} 201}
199 202
200int nilfs_bmap_prepare_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
201void nilfs_bmap_commit_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
202void nilfs_bmap_abort_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
203
204static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap, 203static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap,
205 union nilfs_bmap_ptr_req *req) 204 union nilfs_bmap_ptr_req *req,
205 struct inode *dat)
206{ 206{
207 return NILFS_BMAP_USE_VBN(bmap) ? 207 return dat ? nilfs_dat_prepare_end(dat, &req->bpr_req) : 0;
208 nilfs_bmap_prepare_end_v(bmap, req) : 0;
209} 208}
210 209
211static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap, 210static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap,
212 union nilfs_bmap_ptr_req *req) 211 union nilfs_bmap_ptr_req *req,
212 struct inode *dat)
213{ 213{
214 if (NILFS_BMAP_USE_VBN(bmap)) 214 if (dat)
215 nilfs_bmap_commit_end_v(bmap, req); 215 nilfs_dat_commit_end(dat, &req->bpr_req,
216 bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
216} 217}
217 218
218static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap, 219static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap,
219 union nilfs_bmap_ptr_req *req) 220 union nilfs_bmap_ptr_req *req,
221 struct inode *dat)
220{ 222{
221 if (NILFS_BMAP_USE_VBN(bmap)) 223 if (dat)
222 nilfs_bmap_abort_end_v(bmap, req); 224 nilfs_dat_abort_end(dat, &req->bpr_req);
223} 225}
224 226
225int nilfs_bmap_start_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *,
226 sector_t);
227int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t);
228int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64);
229
230
231__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, 227__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
232 const struct buffer_head *); 228 const struct buffer_head *);
233 229
234__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64); 230__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64);
235__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *); 231__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *);
236 232
237int nilfs_bmap_prepare_update_v(struct nilfs_bmap *,
238 union nilfs_bmap_ptr_req *,
239 union nilfs_bmap_ptr_req *);
240void nilfs_bmap_commit_update_v(struct nilfs_bmap *,
241 union nilfs_bmap_ptr_req *,
242 union nilfs_bmap_ptr_req *);
243void nilfs_bmap_abort_update_v(struct nilfs_bmap *,
244 union nilfs_bmap_ptr_req *,
245 union nilfs_bmap_ptr_req *);
246
247void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int); 233void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int);
248void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int); 234void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int);
249 235
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index aa412724b64e..e25b507a474f 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -71,21 +71,17 @@ void nilfs_btree_path_cache_destroy(void)
71 kmem_cache_destroy(nilfs_btree_path_cache); 71 kmem_cache_destroy(nilfs_btree_path_cache);
72} 72}
73 73
74static inline struct nilfs_btree_path * 74static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75nilfs_btree_alloc_path(const struct nilfs_btree *btree)
76{ 75{
77 return (struct nilfs_btree_path *) 76 return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
78 kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
79} 77}
80 78
81static inline void nilfs_btree_free_path(const struct nilfs_btree *btree, 79static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
82 struct nilfs_btree_path *path)
83{ 80{
84 kmem_cache_free(nilfs_btree_path_cache, path); 81 kmem_cache_free(nilfs_btree_path_cache, path);
85} 82}
86 83
87static void nilfs_btree_init_path(const struct nilfs_btree *btree, 84static void nilfs_btree_init_path(struct nilfs_btree_path *path)
88 struct nilfs_btree_path *path)
89{ 85{
90 int level; 86 int level;
91 87
@@ -101,26 +97,13 @@ static void nilfs_btree_init_path(const struct nilfs_btree *btree,
101 } 97 }
102} 98}
103 99
104static void nilfs_btree_clear_path(const struct nilfs_btree *btree, 100static void nilfs_btree_release_path(struct nilfs_btree_path *path)
105 struct nilfs_btree_path *path)
106{ 101{
107 int level; 102 int level;
108 103
109 for (level = NILFS_BTREE_LEVEL_DATA; 104 for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
110 level < NILFS_BTREE_LEVEL_MAX; 105 level++)
111 level++) { 106 brelse(path[level].bp_bh);
112 if (path[level].bp_bh != NULL) {
113 brelse(path[level].bp_bh);
114 path[level].bp_bh = NULL;
115 }
116 /* sib_bh is released or deleted by prepare or commit
117 * operations. */
118 path[level].bp_sib_bh = NULL;
119 path[level].bp_index = 0;
120 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
122 path[level].bp_op = NULL;
123 }
124} 107}
125 108
126/* 109/*
@@ -148,129 +131,110 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
148} 131}
149 132
150static inline int 133static inline int
151nilfs_btree_node_get_flags(const struct nilfs_btree *btree, 134nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
152 const struct nilfs_btree_node *node)
153{ 135{
154 return node->bn_flags; 136 return node->bn_flags;
155} 137}
156 138
157static inline void 139static inline void
158nilfs_btree_node_set_flags(struct nilfs_btree *btree, 140nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
159 struct nilfs_btree_node *node,
160 int flags)
161{ 141{
162 node->bn_flags = flags; 142 node->bn_flags = flags;
163} 143}
164 144
165static inline int nilfs_btree_node_root(const struct nilfs_btree *btree, 145static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
166 const struct nilfs_btree_node *node)
167{ 146{
168 return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT; 147 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
169} 148}
170 149
171static inline int 150static inline int
172nilfs_btree_node_get_level(const struct nilfs_btree *btree, 151nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
173 const struct nilfs_btree_node *node)
174{ 152{
175 return node->bn_level; 153 return node->bn_level;
176} 154}
177 155
178static inline void 156static inline void
179nilfs_btree_node_set_level(struct nilfs_btree *btree, 157nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
180 struct nilfs_btree_node *node,
181 int level)
182{ 158{
183 node->bn_level = level; 159 node->bn_level = level;
184} 160}
185 161
186static inline int 162static inline int
187nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree, 163nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
188 const struct nilfs_btree_node *node)
189{ 164{
190 return le16_to_cpu(node->bn_nchildren); 165 return le16_to_cpu(node->bn_nchildren);
191} 166}
192 167
193static inline void 168static inline void
194nilfs_btree_node_set_nchildren(struct nilfs_btree *btree, 169nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
195 struct nilfs_btree_node *node,
196 int nchildren)
197{ 170{
198 node->bn_nchildren = cpu_to_le16(nchildren); 171 node->bn_nchildren = cpu_to_le16(nchildren);
199} 172}
200 173
201static inline int 174static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
202nilfs_btree_node_size(const struct nilfs_btree *btree)
203{ 175{
204 return 1 << btree->bt_bmap.b_inode->i_blkbits; 176 return 1 << btree->bt_bmap.b_inode->i_blkbits;
205} 177}
206 178
207static inline int 179static inline int
208nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree, 180nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
209 const struct nilfs_btree_node *node) 181 const struct nilfs_btree *btree)
210{ 182{
211 return nilfs_btree_node_root(btree, node) ? 183 return nilfs_btree_node_root(node) ?
212 NILFS_BTREE_ROOT_NCHILDREN_MIN : 184 NILFS_BTREE_ROOT_NCHILDREN_MIN :
213 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 185 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
214} 186}
215 187
216static inline int 188static inline int
217nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree, 189nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
218 const struct nilfs_btree_node *node) 190 const struct nilfs_btree *btree)
219{ 191{
220 return nilfs_btree_node_root(btree, node) ? 192 return nilfs_btree_node_root(node) ?
221 NILFS_BTREE_ROOT_NCHILDREN_MAX : 193 NILFS_BTREE_ROOT_NCHILDREN_MAX :
222 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree)); 194 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
223} 195}
224 196
225static inline __le64 * 197static inline __le64 *
226nilfs_btree_node_dkeys(const struct nilfs_btree *btree, 198nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
227 const struct nilfs_btree_node *node)
228{ 199{
229 return (__le64 *)((char *)(node + 1) + 200 return (__le64 *)((char *)(node + 1) +
230 (nilfs_btree_node_root(btree, node) ? 201 (nilfs_btree_node_root(node) ?
231 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 202 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
232} 203}
233 204
234static inline __le64 * 205static inline __le64 *
235nilfs_btree_node_dptrs(const struct nilfs_btree *btree, 206nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
236 const struct nilfs_btree_node *node) 207 const struct nilfs_btree *btree)
237{ 208{
238 return (__le64 *)(nilfs_btree_node_dkeys(btree, node) + 209 return (__le64 *)(nilfs_btree_node_dkeys(node) +
239 nilfs_btree_node_nchildren_max(btree, node)); 210 nilfs_btree_node_nchildren_max(node, btree));
240} 211}
241 212
242static inline __u64 213static inline __u64
243nilfs_btree_node_get_key(const struct nilfs_btree *btree, 214nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
244 const struct nilfs_btree_node *node, int index)
245{ 215{
246 return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) + 216 return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
247 index));
248} 217}
249 218
250static inline void 219static inline void
251nilfs_btree_node_set_key(struct nilfs_btree *btree, 220nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
252 struct nilfs_btree_node *node, int index, __u64 key)
253{ 221{
254 *(nilfs_btree_node_dkeys(btree, node) + index) = 222 *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
255 nilfs_bmap_key_to_dkey(key);
256} 223}
257 224
258static inline __u64 225static inline __u64
259nilfs_btree_node_get_ptr(const struct nilfs_btree *btree, 226nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
260 const struct nilfs_btree_node *node, 227 const struct nilfs_btree_node *node, int index)
261 int index)
262{ 228{
263 return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) + 229 return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
264 index)); 230 index));
265} 231}
266 232
267static inline void 233static inline void
268nilfs_btree_node_set_ptr(struct nilfs_btree *btree, 234nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
269 struct nilfs_btree_node *node, 235 struct nilfs_btree_node *node, int index, __u64 ptr)
270 int index,
271 __u64 ptr)
272{ 236{
273 *(nilfs_btree_node_dptrs(btree, node) + index) = 237 *(nilfs_btree_node_dptrs(node, btree) + index) =
274 nilfs_bmap_ptr_to_dptr(ptr); 238 nilfs_bmap_ptr_to_dptr(ptr);
275} 239}
276 240
@@ -283,12 +247,12 @@ static void nilfs_btree_node_init(struct nilfs_btree *btree,
283 __le64 *dptrs; 247 __le64 *dptrs;
284 int i; 248 int i;
285 249
286 nilfs_btree_node_set_flags(btree, node, flags); 250 nilfs_btree_node_set_flags(node, flags);
287 nilfs_btree_node_set_level(btree, node, level); 251 nilfs_btree_node_set_level(node, level);
288 nilfs_btree_node_set_nchildren(btree, node, nchildren); 252 nilfs_btree_node_set_nchildren(node, nchildren);
289 253
290 dkeys = nilfs_btree_node_dkeys(btree, node); 254 dkeys = nilfs_btree_node_dkeys(node);
291 dptrs = nilfs_btree_node_dptrs(btree, node); 255 dptrs = nilfs_btree_node_dptrs(node, btree);
292 for (i = 0; i < nchildren; i++) { 256 for (i = 0; i < nchildren; i++) {
293 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]); 257 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
294 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]); 258 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
@@ -305,13 +269,13 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
305 __le64 *ldptrs, *rdptrs; 269 __le64 *ldptrs, *rdptrs;
306 int lnchildren, rnchildren; 270 int lnchildren, rnchildren;
307 271
308 ldkeys = nilfs_btree_node_dkeys(btree, left); 272 ldkeys = nilfs_btree_node_dkeys(left);
309 ldptrs = nilfs_btree_node_dptrs(btree, left); 273 ldptrs = nilfs_btree_node_dptrs(left, btree);
310 lnchildren = nilfs_btree_node_get_nchildren(btree, left); 274 lnchildren = nilfs_btree_node_get_nchildren(left);
311 275
312 rdkeys = nilfs_btree_node_dkeys(btree, right); 276 rdkeys = nilfs_btree_node_dkeys(right);
313 rdptrs = nilfs_btree_node_dptrs(btree, right); 277 rdptrs = nilfs_btree_node_dptrs(right, btree);
314 rnchildren = nilfs_btree_node_get_nchildren(btree, right); 278 rnchildren = nilfs_btree_node_get_nchildren(right);
315 279
316 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 280 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
317 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 281 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
@@ -320,8 +284,8 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
320 284
321 lnchildren += n; 285 lnchildren += n;
322 rnchildren -= n; 286 rnchildren -= n;
323 nilfs_btree_node_set_nchildren(btree, left, lnchildren); 287 nilfs_btree_node_set_nchildren(left, lnchildren);
324 nilfs_btree_node_set_nchildren(btree, right, rnchildren); 288 nilfs_btree_node_set_nchildren(right, rnchildren);
325} 289}
326 290
327/* Assume that the buffer heads corresponding to left and right are locked. */ 291/* Assume that the buffer heads corresponding to left and right are locked. */
@@ -334,13 +298,13 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
334 __le64 *ldptrs, *rdptrs; 298 __le64 *ldptrs, *rdptrs;
335 int lnchildren, rnchildren; 299 int lnchildren, rnchildren;
336 300
337 ldkeys = nilfs_btree_node_dkeys(btree, left); 301 ldkeys = nilfs_btree_node_dkeys(left);
338 ldptrs = nilfs_btree_node_dptrs(btree, left); 302 ldptrs = nilfs_btree_node_dptrs(left, btree);
339 lnchildren = nilfs_btree_node_get_nchildren(btree, left); 303 lnchildren = nilfs_btree_node_get_nchildren(left);
340 304
341 rdkeys = nilfs_btree_node_dkeys(btree, right); 305 rdkeys = nilfs_btree_node_dkeys(right);
342 rdptrs = nilfs_btree_node_dptrs(btree, right); 306 rdptrs = nilfs_btree_node_dptrs(right, btree);
343 rnchildren = nilfs_btree_node_get_nchildren(btree, right); 307 rnchildren = nilfs_btree_node_get_nchildren(right);
344 308
345 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 309 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
346 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 310 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
@@ -349,8 +313,8 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
349 313
350 lnchildren -= n; 314 lnchildren -= n;
351 rnchildren += n; 315 rnchildren += n;
352 nilfs_btree_node_set_nchildren(btree, left, lnchildren); 316 nilfs_btree_node_set_nchildren(left, lnchildren);
353 nilfs_btree_node_set_nchildren(btree, right, rnchildren); 317 nilfs_btree_node_set_nchildren(right, rnchildren);
354} 318}
355 319
356/* Assume that the buffer head corresponding to node is locked. */ 320/* Assume that the buffer head corresponding to node is locked. */
@@ -362,9 +326,9 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree,
362 __le64 *dptrs; 326 __le64 *dptrs;
363 int nchildren; 327 int nchildren;
364 328
365 dkeys = nilfs_btree_node_dkeys(btree, node); 329 dkeys = nilfs_btree_node_dkeys(node);
366 dptrs = nilfs_btree_node_dptrs(btree, node); 330 dptrs = nilfs_btree_node_dptrs(node, btree);
367 nchildren = nilfs_btree_node_get_nchildren(btree, node); 331 nchildren = nilfs_btree_node_get_nchildren(node);
368 if (index < nchildren) { 332 if (index < nchildren) {
369 memmove(dkeys + index + 1, dkeys + index, 333 memmove(dkeys + index + 1, dkeys + index,
370 (nchildren - index) * sizeof(*dkeys)); 334 (nchildren - index) * sizeof(*dkeys));
@@ -374,7 +338,7 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree,
374 dkeys[index] = nilfs_bmap_key_to_dkey(key); 338 dkeys[index] = nilfs_bmap_key_to_dkey(key);
375 dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr); 339 dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
376 nchildren++; 340 nchildren++;
377 nilfs_btree_node_set_nchildren(btree, node, nchildren); 341 nilfs_btree_node_set_nchildren(node, nchildren);
378} 342}
379 343
380/* Assume that the buffer head corresponding to node is locked. */ 344/* Assume that the buffer head corresponding to node is locked. */
@@ -388,11 +352,11 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree,
388 __le64 *dptrs; 352 __le64 *dptrs;
389 int nchildren; 353 int nchildren;
390 354
391 dkeys = nilfs_btree_node_dkeys(btree, node); 355 dkeys = nilfs_btree_node_dkeys(node);
392 dptrs = nilfs_btree_node_dptrs(btree, node); 356 dptrs = nilfs_btree_node_dptrs(node, btree);
393 key = nilfs_bmap_dkey_to_key(dkeys[index]); 357 key = nilfs_bmap_dkey_to_key(dkeys[index]);
394 ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]); 358 ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
395 nchildren = nilfs_btree_node_get_nchildren(btree, node); 359 nchildren = nilfs_btree_node_get_nchildren(node);
396 if (keyp != NULL) 360 if (keyp != NULL)
397 *keyp = key; 361 *keyp = key;
398 if (ptrp != NULL) 362 if (ptrp != NULL)
@@ -405,11 +369,10 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree,
405 (nchildren - index - 1) * sizeof(*dptrs)); 369 (nchildren - index - 1) * sizeof(*dptrs));
406 } 370 }
407 nchildren--; 371 nchildren--;
408 nilfs_btree_node_set_nchildren(btree, node, nchildren); 372 nilfs_btree_node_set_nchildren(node, nchildren);
409} 373}
410 374
411static int nilfs_btree_node_lookup(const struct nilfs_btree *btree, 375static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
412 const struct nilfs_btree_node *node,
413 __u64 key, int *indexp) 376 __u64 key, int *indexp)
414{ 377{
415 __u64 nkey; 378 __u64 nkey;
@@ -417,12 +380,12 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
417 380
418 /* binary search */ 381 /* binary search */
419 low = 0; 382 low = 0;
420 high = nilfs_btree_node_get_nchildren(btree, node) - 1; 383 high = nilfs_btree_node_get_nchildren(node) - 1;
421 index = 0; 384 index = 0;
422 s = 0; 385 s = 0;
423 while (low <= high) { 386 while (low <= high) {
424 index = (low + high) / 2; 387 index = (low + high) / 2;
425 nkey = nilfs_btree_node_get_key(btree, node, index); 388 nkey = nilfs_btree_node_get_key(node, index);
426 if (nkey == key) { 389 if (nkey == key) {
427 s = 0; 390 s = 0;
428 goto out; 391 goto out;
@@ -436,9 +399,8 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
436 } 399 }
437 400
438 /* adjust index */ 401 /* adjust index */
439 if (nilfs_btree_node_get_level(btree, node) > 402 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
440 NILFS_BTREE_LEVEL_NODE_MIN) { 403 if (s > 0 && index > 0)
441 if ((s > 0) && (index > 0))
442 index--; 404 index--;
443 } else if (s < 0) 405 } else if (s < 0)
444 index++; 406 index++;
@@ -456,25 +418,20 @@ nilfs_btree_get_root(const struct nilfs_btree *btree)
456} 418}
457 419
458static inline struct nilfs_btree_node * 420static inline struct nilfs_btree_node *
459nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree, 421nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
460 const struct nilfs_btree_path *path,
461 int level)
462{ 422{
463 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 423 return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
464} 424}
465 425
466static inline struct nilfs_btree_node * 426static inline struct nilfs_btree_node *
467nilfs_btree_get_sib_node(const struct nilfs_btree *btree, 427nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
468 const struct nilfs_btree_path *path,
469 int level)
470{ 428{
471 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 429 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
472} 430}
473 431
474static inline int nilfs_btree_height(const struct nilfs_btree *btree) 432static inline int nilfs_btree_height(const struct nilfs_btree *btree)
475{ 433{
476 return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree)) 434 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
477 + 1;
478} 435}
479 436
480static inline struct nilfs_btree_node * 437static inline struct nilfs_btree_node *
@@ -484,7 +441,7 @@ nilfs_btree_get_node(const struct nilfs_btree *btree,
484{ 441{
485 return (level == nilfs_btree_height(btree) - 1) ? 442 return (level == nilfs_btree_height(btree) - 1) ?
486 nilfs_btree_get_root(btree) : 443 nilfs_btree_get_root(btree) :
487 nilfs_btree_get_nonroot_node(btree, path, level); 444 nilfs_btree_get_nonroot_node(path, level);
488} 445}
489 446
490static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, 447static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
@@ -496,12 +453,11 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
496 int level, index, found, ret; 453 int level, index, found, ret;
497 454
498 node = nilfs_btree_get_root(btree); 455 node = nilfs_btree_get_root(btree);
499 level = nilfs_btree_node_get_level(btree, node); 456 level = nilfs_btree_node_get_level(node);
500 if ((level < minlevel) || 457 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
501 (nilfs_btree_node_get_nchildren(btree, node) <= 0))
502 return -ENOENT; 458 return -ENOENT;
503 459
504 found = nilfs_btree_node_lookup(btree, node, key, &index); 460 found = nilfs_btree_node_lookup(node, key, &index);
505 ptr = nilfs_btree_node_get_ptr(btree, node, index); 461 ptr = nilfs_btree_node_get_ptr(btree, node, index);
506 path[level].bp_bh = NULL; 462 path[level].bp_bh = NULL;
507 path[level].bp_index = index; 463 path[level].bp_index = index;
@@ -510,14 +466,13 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
510 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 466 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
511 if (ret < 0) 467 if (ret < 0)
512 return ret; 468 return ret;
513 node = nilfs_btree_get_nonroot_node(btree, path, level); 469 node = nilfs_btree_get_nonroot_node(path, level);
514 BUG_ON(level != nilfs_btree_node_get_level(btree, node)); 470 BUG_ON(level != nilfs_btree_node_get_level(node));
515 if (!found) 471 if (!found)
516 found = nilfs_btree_node_lookup(btree, node, key, 472 found = nilfs_btree_node_lookup(node, key, &index);
517 &index);
518 else 473 else
519 index = 0; 474 index = 0;
520 if (index < nilfs_btree_node_nchildren_max(btree, node)) 475 if (index < nilfs_btree_node_nchildren_max(node, btree))
521 ptr = nilfs_btree_node_get_ptr(btree, node, index); 476 ptr = nilfs_btree_node_get_ptr(btree, node, index);
522 else { 477 else {
523 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 478 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
@@ -544,10 +499,10 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
544 int index, level, ret; 499 int index, level, ret;
545 500
546 node = nilfs_btree_get_root(btree); 501 node = nilfs_btree_get_root(btree);
547 index = nilfs_btree_node_get_nchildren(btree, node) - 1; 502 index = nilfs_btree_node_get_nchildren(node) - 1;
548 if (index < 0) 503 if (index < 0)
549 return -ENOENT; 504 return -ENOENT;
550 level = nilfs_btree_node_get_level(btree, node); 505 level = nilfs_btree_node_get_level(node);
551 ptr = nilfs_btree_node_get_ptr(btree, node, index); 506 ptr = nilfs_btree_node_get_ptr(btree, node, index);
552 path[level].bp_bh = NULL; 507 path[level].bp_bh = NULL;
553 path[level].bp_index = index; 508 path[level].bp_index = index;
@@ -556,15 +511,15 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
556 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 511 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
557 if (ret < 0) 512 if (ret < 0)
558 return ret; 513 return ret;
559 node = nilfs_btree_get_nonroot_node(btree, path, level); 514 node = nilfs_btree_get_nonroot_node(path, level);
560 BUG_ON(level != nilfs_btree_node_get_level(btree, node)); 515 BUG_ON(level != nilfs_btree_node_get_level(node));
561 index = nilfs_btree_node_get_nchildren(btree, node) - 1; 516 index = nilfs_btree_node_get_nchildren(node) - 1;
562 ptr = nilfs_btree_node_get_ptr(btree, node, index); 517 ptr = nilfs_btree_node_get_ptr(btree, node, index);
563 path[level].bp_index = index; 518 path[level].bp_index = index;
564 } 519 }
565 520
566 if (keyp != NULL) 521 if (keyp != NULL)
567 *keyp = nilfs_btree_node_get_key(btree, node, index); 522 *keyp = nilfs_btree_node_get_key(node, index);
568 if (ptrp != NULL) 523 if (ptrp != NULL)
569 *ptrp = ptr; 524 *ptrp = ptr;
570 525
@@ -580,18 +535,18 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
580 int ret; 535 int ret;
581 536
582 btree = (struct nilfs_btree *)bmap; 537 btree = (struct nilfs_btree *)bmap;
583 path = nilfs_btree_alloc_path(btree); 538 path = nilfs_btree_alloc_path();
584 if (path == NULL) 539 if (path == NULL)
585 return -ENOMEM; 540 return -ENOMEM;
586 nilfs_btree_init_path(btree, path); 541 nilfs_btree_init_path(path);
587 542
588 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); 543 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
589 544
590 if (ptrp != NULL) 545 if (ptrp != NULL)
591 *ptrp = ptr; 546 *ptrp = ptr;
592 547
593 nilfs_btree_clear_path(btree, path); 548 nilfs_btree_release_path(path);
594 nilfs_btree_free_path(btree, path); 549 nilfs_btree_free_path(path);
595 550
596 return ret; 551 return ret;
597} 552}
@@ -608,10 +563,10 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
608 int level = NILFS_BTREE_LEVEL_NODE_MIN; 563 int level = NILFS_BTREE_LEVEL_NODE_MIN;
609 int ret, cnt, index, maxlevel; 564 int ret, cnt, index, maxlevel;
610 565
611 path = nilfs_btree_alloc_path(btree); 566 path = nilfs_btree_alloc_path();
612 if (path == NULL) 567 if (path == NULL)
613 return -ENOMEM; 568 return -ENOMEM;
614 nilfs_btree_init_path(btree, path); 569 nilfs_btree_init_path(path);
615 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); 570 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
616 if (ret < 0) 571 if (ret < 0)
617 goto out; 572 goto out;
@@ -631,8 +586,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
631 node = nilfs_btree_get_node(btree, path, level); 586 node = nilfs_btree_get_node(btree, path, level);
632 index = path[level].bp_index + 1; 587 index = path[level].bp_index + 1;
633 for (;;) { 588 for (;;) {
634 while (index < nilfs_btree_node_get_nchildren(btree, node)) { 589 while (index < nilfs_btree_node_get_nchildren(node)) {
635 if (nilfs_btree_node_get_key(btree, node, index) != 590 if (nilfs_btree_node_get_key(node, index) !=
636 key + cnt) 591 key + cnt)
637 goto end; 592 goto end;
638 ptr2 = nilfs_btree_node_get_ptr(btree, node, index); 593 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
@@ -653,8 +608,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
653 /* look-up right sibling node */ 608 /* look-up right sibling node */
654 node = nilfs_btree_get_node(btree, path, level + 1); 609 node = nilfs_btree_get_node(btree, path, level + 1);
655 index = path[level + 1].bp_index + 1; 610 index = path[level + 1].bp_index + 1;
656 if (index >= nilfs_btree_node_get_nchildren(btree, node) || 611 if (index >= nilfs_btree_node_get_nchildren(node) ||
657 nilfs_btree_node_get_key(btree, node, index) != key + cnt) 612 nilfs_btree_node_get_key(node, index) != key + cnt)
658 break; 613 break;
659 ptr2 = nilfs_btree_node_get_ptr(btree, node, index); 614 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
660 path[level + 1].bp_index = index; 615 path[level + 1].bp_index = index;
@@ -664,7 +619,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
664 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh); 619 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
665 if (ret < 0) 620 if (ret < 0)
666 goto out; 621 goto out;
667 node = nilfs_btree_get_nonroot_node(btree, path, level); 622 node = nilfs_btree_get_nonroot_node(path, level);
668 index = 0; 623 index = 0;
669 path[level].bp_index = index; 624 path[level].bp_index = index;
670 } 625 }
@@ -672,8 +627,8 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
672 *ptrp = ptr; 627 *ptrp = ptr;
673 ret = cnt; 628 ret = cnt;
674 out: 629 out:
675 nilfs_btree_clear_path(btree, path); 630 nilfs_btree_release_path(path);
676 nilfs_btree_free_path(btree, path); 631 nilfs_btree_free_path(path);
677 return ret; 632 return ret;
678} 633}
679 634
@@ -685,9 +640,7 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree,
685 do { 640 do {
686 lock_buffer(path[level].bp_bh); 641 lock_buffer(path[level].bp_bh);
687 nilfs_btree_node_set_key( 642 nilfs_btree_node_set_key(
688 btree, 643 nilfs_btree_get_nonroot_node(path, level),
689 nilfs_btree_get_nonroot_node(
690 btree, path, level),
691 path[level].bp_index, key); 644 path[level].bp_index, key);
692 if (!buffer_dirty(path[level].bp_bh)) 645 if (!buffer_dirty(path[level].bp_bh))
693 nilfs_btnode_mark_dirty(path[level].bp_bh); 646 nilfs_btnode_mark_dirty(path[level].bp_bh);
@@ -698,8 +651,7 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree,
698 651
699 /* root */ 652 /* root */
700 if (level == nilfs_btree_height(btree) - 1) { 653 if (level == nilfs_btree_height(btree) - 1) {
701 nilfs_btree_node_set_key(btree, 654 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
702 nilfs_btree_get_root(btree),
703 path[level].bp_index, key); 655 path[level].bp_index, key);
704 } 656 }
705} 657}
@@ -712,7 +664,7 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree,
712 664
713 if (level < nilfs_btree_height(btree) - 1) { 665 if (level < nilfs_btree_height(btree) - 1) {
714 lock_buffer(path[level].bp_bh); 666 lock_buffer(path[level].bp_bh);
715 node = nilfs_btree_get_nonroot_node(btree, path, level); 667 node = nilfs_btree_get_nonroot_node(path, level);
716 nilfs_btree_node_insert(btree, node, *keyp, *ptrp, 668 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
717 path[level].bp_index); 669 path[level].bp_index);
718 if (!buffer_dirty(path[level].bp_bh)) 670 if (!buffer_dirty(path[level].bp_bh))
@@ -721,8 +673,8 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree,
721 673
722 if (path[level].bp_index == 0) 674 if (path[level].bp_index == 0)
723 nilfs_btree_promote_key(btree, path, level + 1, 675 nilfs_btree_promote_key(btree, path, level + 1,
724 nilfs_btree_node_get_key( 676 nilfs_btree_node_get_key(node,
725 btree, node, 0)); 677 0));
726 } else { 678 } else {
727 node = nilfs_btree_get_root(btree); 679 node = nilfs_btree_get_root(btree);
728 nilfs_btree_node_insert(btree, node, *keyp, *ptrp, 680 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
@@ -740,10 +692,10 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
740 lock_buffer(path[level].bp_bh); 692 lock_buffer(path[level].bp_bh);
741 lock_buffer(path[level].bp_sib_bh); 693 lock_buffer(path[level].bp_sib_bh);
742 694
743 node = nilfs_btree_get_nonroot_node(btree, path, level); 695 node = nilfs_btree_get_nonroot_node(path, level);
744 left = nilfs_btree_get_sib_node(btree, path, level); 696 left = nilfs_btree_get_sib_node(path, level);
745 nchildren = nilfs_btree_node_get_nchildren(btree, node); 697 nchildren = nilfs_btree_node_get_nchildren(node);
746 lnchildren = nilfs_btree_node_get_nchildren(btree, left); 698 lnchildren = nilfs_btree_node_get_nchildren(left);
747 move = 0; 699 move = 0;
748 700
749 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 701 n = (nchildren + lnchildren + 1) / 2 - lnchildren;
@@ -764,7 +716,7 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
764 unlock_buffer(path[level].bp_sib_bh); 716 unlock_buffer(path[level].bp_sib_bh);
765 717
766 nilfs_btree_promote_key(btree, path, level + 1, 718 nilfs_btree_promote_key(btree, path, level + 1,
767 nilfs_btree_node_get_key(btree, node, 0)); 719 nilfs_btree_node_get_key(node, 0));
768 720
769 if (move) { 721 if (move) {
770 brelse(path[level].bp_bh); 722 brelse(path[level].bp_bh);
@@ -791,10 +743,10 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
791 lock_buffer(path[level].bp_bh); 743 lock_buffer(path[level].bp_bh);
792 lock_buffer(path[level].bp_sib_bh); 744 lock_buffer(path[level].bp_sib_bh);
793 745
794 node = nilfs_btree_get_nonroot_node(btree, path, level); 746 node = nilfs_btree_get_nonroot_node(path, level);
795 right = nilfs_btree_get_sib_node(btree, path, level); 747 right = nilfs_btree_get_sib_node(path, level);
796 nchildren = nilfs_btree_node_get_nchildren(btree, node); 748 nchildren = nilfs_btree_node_get_nchildren(node);
797 rnchildren = nilfs_btree_node_get_nchildren(btree, right); 749 rnchildren = nilfs_btree_node_get_nchildren(right);
798 move = 0; 750 move = 0;
799 751
800 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 752 n = (nchildren + rnchildren + 1) / 2 - rnchildren;
@@ -816,15 +768,14 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
816 768
817 path[level + 1].bp_index++; 769 path[level + 1].bp_index++;
818 nilfs_btree_promote_key(btree, path, level + 1, 770 nilfs_btree_promote_key(btree, path, level + 1,
819 nilfs_btree_node_get_key(btree, right, 0)); 771 nilfs_btree_node_get_key(right, 0));
820 path[level + 1].bp_index--; 772 path[level + 1].bp_index--;
821 773
822 if (move) { 774 if (move) {
823 brelse(path[level].bp_bh); 775 brelse(path[level].bp_bh);
824 path[level].bp_bh = path[level].bp_sib_bh; 776 path[level].bp_bh = path[level].bp_sib_bh;
825 path[level].bp_sib_bh = NULL; 777 path[level].bp_sib_bh = NULL;
826 path[level].bp_index -= 778 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
827 nilfs_btree_node_get_nchildren(btree, node);
828 path[level + 1].bp_index++; 779 path[level + 1].bp_index++;
829 } else { 780 } else {
830 brelse(path[level].bp_sib_bh); 781 brelse(path[level].bp_sib_bh);
@@ -846,9 +797,9 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
846 lock_buffer(path[level].bp_bh); 797 lock_buffer(path[level].bp_bh);
847 lock_buffer(path[level].bp_sib_bh); 798 lock_buffer(path[level].bp_sib_bh);
848 799
849 node = nilfs_btree_get_nonroot_node(btree, path, level); 800 node = nilfs_btree_get_nonroot_node(path, level);
850 right = nilfs_btree_get_sib_node(btree, path, level); 801 right = nilfs_btree_get_sib_node(path, level);
851 nchildren = nilfs_btree_node_get_nchildren(btree, node); 802 nchildren = nilfs_btree_node_get_nchildren(node);
852 move = 0; 803 move = 0;
853 804
854 n = (nchildren + 1) / 2; 805 n = (nchildren + 1) / 2;
@@ -867,16 +818,15 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
867 unlock_buffer(path[level].bp_bh); 818 unlock_buffer(path[level].bp_bh);
868 unlock_buffer(path[level].bp_sib_bh); 819 unlock_buffer(path[level].bp_sib_bh);
869 820
870 newkey = nilfs_btree_node_get_key(btree, right, 0); 821 newkey = nilfs_btree_node_get_key(right, 0);
871 newptr = path[level].bp_newreq.bpr_ptr; 822 newptr = path[level].bp_newreq.bpr_ptr;
872 823
873 if (move) { 824 if (move) {
874 path[level].bp_index -= 825 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
875 nilfs_btree_node_get_nchildren(btree, node);
876 nilfs_btree_node_insert(btree, right, *keyp, *ptrp, 826 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
877 path[level].bp_index); 827 path[level].bp_index);
878 828
879 *keyp = nilfs_btree_node_get_key(btree, right, 0); 829 *keyp = nilfs_btree_node_get_key(right, 0);
880 *ptrp = path[level].bp_newreq.bpr_ptr; 830 *ptrp = path[level].bp_newreq.bpr_ptr;
881 831
882 brelse(path[level].bp_bh); 832 brelse(path[level].bp_bh);
@@ -885,7 +835,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
885 } else { 835 } else {
886 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 836 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
887 837
888 *keyp = nilfs_btree_node_get_key(btree, right, 0); 838 *keyp = nilfs_btree_node_get_key(right, 0);
889 *ptrp = path[level].bp_newreq.bpr_ptr; 839 *ptrp = path[level].bp_newreq.bpr_ptr;
890 840
891 brelse(path[level].bp_sib_bh); 841 brelse(path[level].bp_sib_bh);
@@ -905,12 +855,12 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
905 lock_buffer(path[level].bp_sib_bh); 855 lock_buffer(path[level].bp_sib_bh);
906 856
907 root = nilfs_btree_get_root(btree); 857 root = nilfs_btree_get_root(btree);
908 child = nilfs_btree_get_sib_node(btree, path, level); 858 child = nilfs_btree_get_sib_node(path, level);
909 859
910 n = nilfs_btree_node_get_nchildren(btree, root); 860 n = nilfs_btree_node_get_nchildren(root);
911 861
912 nilfs_btree_node_move_right(btree, root, child, n); 862 nilfs_btree_node_move_right(btree, root, child, n);
913 nilfs_btree_node_set_level(btree, root, level + 1); 863 nilfs_btree_node_set_level(root, level + 1);
914 864
915 if (!buffer_dirty(path[level].bp_sib_bh)) 865 if (!buffer_dirty(path[level].bp_sib_bh))
916 nilfs_btnode_mark_dirty(path[level].bp_sib_bh); 866 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
@@ -922,7 +872,7 @@ static void nilfs_btree_grow(struct nilfs_btree *btree,
922 872
923 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 873 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
924 874
925 *keyp = nilfs_btree_node_get_key(btree, child, 0); 875 *keyp = nilfs_btree_node_get_key(child, 0);
926 *ptrp = path[level].bp_newreq.bpr_ptr; 876 *ptrp = path[level].bp_newreq.bpr_ptr;
927} 877}
928 878
@@ -990,26 +940,29 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
990 struct nilfs_btree_node *node, *parent, *sib; 940 struct nilfs_btree_node *node, *parent, *sib;
991 __u64 sibptr; 941 __u64 sibptr;
992 int pindex, level, ret; 942 int pindex, level, ret;
943 struct inode *dat = NULL;
993 944
994 stats->bs_nblocks = 0; 945 stats->bs_nblocks = 0;
995 level = NILFS_BTREE_LEVEL_DATA; 946 level = NILFS_BTREE_LEVEL_DATA;
996 947
997 /* allocate a new ptr for data block */ 948 /* allocate a new ptr for data block */
998 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) 949 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
999 path[level].bp_newreq.bpr_ptr = 950 path[level].bp_newreq.bpr_ptr =
1000 nilfs_btree_find_target_v(btree, path, key); 951 nilfs_btree_find_target_v(btree, path, key);
952 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
953 }
1001 954
1002 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, 955 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1003 &path[level].bp_newreq); 956 &path[level].bp_newreq, dat);
1004 if (ret < 0) 957 if (ret < 0)
1005 goto err_out_data; 958 goto err_out_data;
1006 959
1007 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 960 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1008 level < nilfs_btree_height(btree) - 1; 961 level < nilfs_btree_height(btree) - 1;
1009 level++) { 962 level++) {
1010 node = nilfs_btree_get_nonroot_node(btree, path, level); 963 node = nilfs_btree_get_nonroot_node(path, level);
1011 if (nilfs_btree_node_get_nchildren(btree, node) < 964 if (nilfs_btree_node_get_nchildren(node) <
1012 nilfs_btree_node_nchildren_max(btree, node)) { 965 nilfs_btree_node_nchildren_max(node, btree)) {
1013 path[level].bp_op = nilfs_btree_do_insert; 966 path[level].bp_op = nilfs_btree_do_insert;
1014 stats->bs_nblocks++; 967 stats->bs_nblocks++;
1015 goto out; 968 goto out;
@@ -1026,8 +979,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1026 if (ret < 0) 979 if (ret < 0)
1027 goto err_out_child_node; 980 goto err_out_child_node;
1028 sib = (struct nilfs_btree_node *)bh->b_data; 981 sib = (struct nilfs_btree_node *)bh->b_data;
1029 if (nilfs_btree_node_get_nchildren(btree, sib) < 982 if (nilfs_btree_node_get_nchildren(sib) <
1030 nilfs_btree_node_nchildren_max(btree, sib)) { 983 nilfs_btree_node_nchildren_max(sib, btree)) {
1031 path[level].bp_sib_bh = bh; 984 path[level].bp_sib_bh = bh;
1032 path[level].bp_op = nilfs_btree_carry_left; 985 path[level].bp_op = nilfs_btree_carry_left;
1033 stats->bs_nblocks++; 986 stats->bs_nblocks++;
@@ -1038,15 +991,15 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1038 991
1039 /* right sibling */ 992 /* right sibling */
1040 if (pindex < 993 if (pindex <
1041 nilfs_btree_node_get_nchildren(btree, parent) - 1) { 994 nilfs_btree_node_get_nchildren(parent) - 1) {
1042 sibptr = nilfs_btree_node_get_ptr(btree, parent, 995 sibptr = nilfs_btree_node_get_ptr(btree, parent,
1043 pindex + 1); 996 pindex + 1);
1044 ret = nilfs_btree_get_block(btree, sibptr, &bh); 997 ret = nilfs_btree_get_block(btree, sibptr, &bh);
1045 if (ret < 0) 998 if (ret < 0)
1046 goto err_out_child_node; 999 goto err_out_child_node;
1047 sib = (struct nilfs_btree_node *)bh->b_data; 1000 sib = (struct nilfs_btree_node *)bh->b_data;
1048 if (nilfs_btree_node_get_nchildren(btree, sib) < 1001 if (nilfs_btree_node_get_nchildren(sib) <
1049 nilfs_btree_node_nchildren_max(btree, sib)) { 1002 nilfs_btree_node_nchildren_max(sib, btree)) {
1050 path[level].bp_sib_bh = bh; 1003 path[level].bp_sib_bh = bh;
1051 path[level].bp_op = nilfs_btree_carry_right; 1004 path[level].bp_op = nilfs_btree_carry_right;
1052 stats->bs_nblocks++; 1005 stats->bs_nblocks++;
@@ -1059,7 +1012,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1059 path[level].bp_newreq.bpr_ptr = 1012 path[level].bp_newreq.bpr_ptr =
1060 path[level - 1].bp_newreq.bpr_ptr + 1; 1013 path[level - 1].bp_newreq.bpr_ptr + 1;
1061 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, 1014 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1062 &path[level].bp_newreq); 1015 &path[level].bp_newreq, dat);
1063 if (ret < 0) 1016 if (ret < 0)
1064 goto err_out_child_node; 1017 goto err_out_child_node;
1065 ret = nilfs_btree_get_new_block(btree, 1018 ret = nilfs_btree_get_new_block(btree,
@@ -1081,8 +1034,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1081 1034
1082 /* root */ 1035 /* root */
1083 node = nilfs_btree_get_root(btree); 1036 node = nilfs_btree_get_root(btree);
1084 if (nilfs_btree_node_get_nchildren(btree, node) < 1037 if (nilfs_btree_node_get_nchildren(node) <
1085 nilfs_btree_node_nchildren_max(btree, node)) { 1038 nilfs_btree_node_nchildren_max(node, btree)) {
1086 path[level].bp_op = nilfs_btree_do_insert; 1039 path[level].bp_op = nilfs_btree_do_insert;
1087 stats->bs_nblocks++; 1040 stats->bs_nblocks++;
1088 goto out; 1041 goto out;
@@ -1091,7 +1044,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1091 /* grow */ 1044 /* grow */
1092 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1045 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1093 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, 1046 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1094 &path[level].bp_newreq); 1047 &path[level].bp_newreq, dat);
1095 if (ret < 0) 1048 if (ret < 0)
1096 goto err_out_child_node; 1049 goto err_out_child_node;
1097 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1050 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
@@ -1119,16 +1072,18 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
1119 1072
1120 /* error */ 1073 /* error */
1121 err_out_curr_node: 1074 err_out_curr_node:
1122 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq); 1075 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1076 dat);
1123 err_out_child_node: 1077 err_out_child_node:
1124 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1078 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1125 nilfs_btnode_delete(path[level].bp_sib_bh); 1079 nilfs_btnode_delete(path[level].bp_sib_bh);
1126 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, 1080 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1127 &path[level].bp_newreq); 1081 &path[level].bp_newreq, dat);
1128 1082
1129 } 1083 }
1130 1084
1131 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq); 1085 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1086 dat);
1132 err_out_data: 1087 err_out_data:
1133 *levelp = level; 1088 *levelp = level;
1134 stats->bs_nblocks = 0; 1089 stats->bs_nblocks = 0;
@@ -1139,16 +1094,19 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1139 struct nilfs_btree_path *path, 1094 struct nilfs_btree_path *path,
1140 int maxlevel, __u64 key, __u64 ptr) 1095 int maxlevel, __u64 key, __u64 ptr)
1141{ 1096{
1097 struct inode *dat = NULL;
1142 int level; 1098 int level;
1143 1099
1144 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1100 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1145 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1101 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1146 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) 1102 if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1147 nilfs_btree_set_target_v(btree, key, ptr); 1103 nilfs_btree_set_target_v(btree, key, ptr);
1104 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1105 }
1148 1106
1149 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1107 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1150 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap, 1108 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1151 &path[level - 1].bp_newreq); 1109 &path[level - 1].bp_newreq, dat);
1152 path[level].bp_op(btree, path, level, &key, &ptr); 1110 path[level].bp_op(btree, path, level, &key, &ptr);
1153 } 1111 }
1154 1112
@@ -1164,10 +1122,10 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1164 int level, ret; 1122 int level, ret;
1165 1123
1166 btree = (struct nilfs_btree *)bmap; 1124 btree = (struct nilfs_btree *)bmap;
1167 path = nilfs_btree_alloc_path(btree); 1125 path = nilfs_btree_alloc_path();
1168 if (path == NULL) 1126 if (path == NULL)
1169 return -ENOMEM; 1127 return -ENOMEM;
1170 nilfs_btree_init_path(btree, path); 1128 nilfs_btree_init_path(path);
1171 1129
1172 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1130 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1173 NILFS_BTREE_LEVEL_NODE_MIN); 1131 NILFS_BTREE_LEVEL_NODE_MIN);
@@ -1184,8 +1142,8 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1184 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); 1142 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1185 1143
1186 out: 1144 out:
1187 nilfs_btree_clear_path(btree, path); 1145 nilfs_btree_release_path(path);
1188 nilfs_btree_free_path(btree, path); 1146 nilfs_btree_free_path(path);
1189 return ret; 1147 return ret;
1190} 1148}
1191 1149
@@ -1197,7 +1155,7 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1197 1155
1198 if (level < nilfs_btree_height(btree) - 1) { 1156 if (level < nilfs_btree_height(btree) - 1) {
1199 lock_buffer(path[level].bp_bh); 1157 lock_buffer(path[level].bp_bh);
1200 node = nilfs_btree_get_nonroot_node(btree, path, level); 1158 node = nilfs_btree_get_nonroot_node(path, level);
1201 nilfs_btree_node_delete(btree, node, keyp, ptrp, 1159 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1202 path[level].bp_index); 1160 path[level].bp_index);
1203 if (!buffer_dirty(path[level].bp_bh)) 1161 if (!buffer_dirty(path[level].bp_bh))
@@ -1205,7 +1163,7 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1205 unlock_buffer(path[level].bp_bh); 1163 unlock_buffer(path[level].bp_bh);
1206 if (path[level].bp_index == 0) 1164 if (path[level].bp_index == 0)
1207 nilfs_btree_promote_key(btree, path, level + 1, 1165 nilfs_btree_promote_key(btree, path, level + 1,
1208 nilfs_btree_node_get_key(btree, node, 0)); 1166 nilfs_btree_node_get_key(node, 0));
1209 } else { 1167 } else {
1210 node = nilfs_btree_get_root(btree); 1168 node = nilfs_btree_get_root(btree);
1211 nilfs_btree_node_delete(btree, node, keyp, ptrp, 1169 nilfs_btree_node_delete(btree, node, keyp, ptrp,
@@ -1225,10 +1183,10 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1225 lock_buffer(path[level].bp_bh); 1183 lock_buffer(path[level].bp_bh);
1226 lock_buffer(path[level].bp_sib_bh); 1184 lock_buffer(path[level].bp_sib_bh);
1227 1185
1228 node = nilfs_btree_get_nonroot_node(btree, path, level); 1186 node = nilfs_btree_get_nonroot_node(path, level);
1229 left = nilfs_btree_get_sib_node(btree, path, level); 1187 left = nilfs_btree_get_sib_node(path, level);
1230 nchildren = nilfs_btree_node_get_nchildren(btree, node); 1188 nchildren = nilfs_btree_node_get_nchildren(node);
1231 lnchildren = nilfs_btree_node_get_nchildren(btree, left); 1189 lnchildren = nilfs_btree_node_get_nchildren(left);
1232 1190
1233 n = (nchildren + lnchildren) / 2 - nchildren; 1191 n = (nchildren + lnchildren) / 2 - nchildren;
1234 1192
@@ -1243,7 +1201,7 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1243 unlock_buffer(path[level].bp_sib_bh); 1201 unlock_buffer(path[level].bp_sib_bh);
1244 1202
1245 nilfs_btree_promote_key(btree, path, level + 1, 1203 nilfs_btree_promote_key(btree, path, level + 1,
1246 nilfs_btree_node_get_key(btree, node, 0)); 1204 nilfs_btree_node_get_key(node, 0));
1247 1205
1248 brelse(path[level].bp_sib_bh); 1206 brelse(path[level].bp_sib_bh);
1249 path[level].bp_sib_bh = NULL; 1207 path[level].bp_sib_bh = NULL;
@@ -1262,10 +1220,10 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1262 lock_buffer(path[level].bp_bh); 1220 lock_buffer(path[level].bp_bh);
1263 lock_buffer(path[level].bp_sib_bh); 1221 lock_buffer(path[level].bp_sib_bh);
1264 1222
1265 node = nilfs_btree_get_nonroot_node(btree, path, level); 1223 node = nilfs_btree_get_nonroot_node(path, level);
1266 right = nilfs_btree_get_sib_node(btree, path, level); 1224 right = nilfs_btree_get_sib_node(path, level);
1267 nchildren = nilfs_btree_node_get_nchildren(btree, node); 1225 nchildren = nilfs_btree_node_get_nchildren(node);
1268 rnchildren = nilfs_btree_node_get_nchildren(btree, right); 1226 rnchildren = nilfs_btree_node_get_nchildren(right);
1269 1227
1270 n = (nchildren + rnchildren) / 2 - nchildren; 1228 n = (nchildren + rnchildren) / 2 - nchildren;
1271 1229
@@ -1281,7 +1239,7 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1281 1239
1282 path[level + 1].bp_index++; 1240 path[level + 1].bp_index++;
1283 nilfs_btree_promote_key(btree, path, level + 1, 1241 nilfs_btree_promote_key(btree, path, level + 1,
1284 nilfs_btree_node_get_key(btree, right, 0)); 1242 nilfs_btree_node_get_key(right, 0));
1285 path[level + 1].bp_index--; 1243 path[level + 1].bp_index--;
1286 1244
1287 brelse(path[level].bp_sib_bh); 1245 brelse(path[level].bp_sib_bh);
@@ -1300,10 +1258,10 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1300 lock_buffer(path[level].bp_bh); 1258 lock_buffer(path[level].bp_bh);
1301 lock_buffer(path[level].bp_sib_bh); 1259 lock_buffer(path[level].bp_sib_bh);
1302 1260
1303 node = nilfs_btree_get_nonroot_node(btree, path, level); 1261 node = nilfs_btree_get_nonroot_node(path, level);
1304 left = nilfs_btree_get_sib_node(btree, path, level); 1262 left = nilfs_btree_get_sib_node(path, level);
1305 1263
1306 n = nilfs_btree_node_get_nchildren(btree, node); 1264 n = nilfs_btree_node_get_nchildren(node);
1307 1265
1308 nilfs_btree_node_move_left(btree, left, node, n); 1266 nilfs_btree_node_move_left(btree, left, node, n);
1309 1267
@@ -1316,7 +1274,7 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1316 nilfs_btnode_delete(path[level].bp_bh); 1274 nilfs_btnode_delete(path[level].bp_bh);
1317 path[level].bp_bh = path[level].bp_sib_bh; 1275 path[level].bp_bh = path[level].bp_sib_bh;
1318 path[level].bp_sib_bh = NULL; 1276 path[level].bp_sib_bh = NULL;
1319 path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left); 1277 path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1320} 1278}
1321 1279
1322static void nilfs_btree_concat_right(struct nilfs_btree *btree, 1280static void nilfs_btree_concat_right(struct nilfs_btree *btree,
@@ -1331,10 +1289,10 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1331 lock_buffer(path[level].bp_bh); 1289 lock_buffer(path[level].bp_bh);
1332 lock_buffer(path[level].bp_sib_bh); 1290 lock_buffer(path[level].bp_sib_bh);
1333 1291
1334 node = nilfs_btree_get_nonroot_node(btree, path, level); 1292 node = nilfs_btree_get_nonroot_node(path, level);
1335 right = nilfs_btree_get_sib_node(btree, path, level); 1293 right = nilfs_btree_get_sib_node(path, level);
1336 1294
1337 n = nilfs_btree_node_get_nchildren(btree, right); 1295 n = nilfs_btree_node_get_nchildren(right);
1338 1296
1339 nilfs_btree_node_move_left(btree, node, right, n); 1297 nilfs_btree_node_move_left(btree, node, right, n);
1340 1298
@@ -1360,11 +1318,11 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
1360 1318
1361 lock_buffer(path[level].bp_bh); 1319 lock_buffer(path[level].bp_bh);
1362 root = nilfs_btree_get_root(btree); 1320 root = nilfs_btree_get_root(btree);
1363 child = nilfs_btree_get_nonroot_node(btree, path, level); 1321 child = nilfs_btree_get_nonroot_node(path, level);
1364 1322
1365 nilfs_btree_node_delete(btree, root, NULL, NULL, 0); 1323 nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1366 nilfs_btree_node_set_level(btree, root, level); 1324 nilfs_btree_node_set_level(root, level);
1367 n = nilfs_btree_node_get_nchildren(btree, child); 1325 n = nilfs_btree_node_get_nchildren(child);
1368 nilfs_btree_node_move_left(btree, root, child, n); 1326 nilfs_btree_node_move_left(btree, root, child, n);
1369 unlock_buffer(path[level].bp_bh); 1327 unlock_buffer(path[level].bp_bh);
1370 1328
@@ -1376,7 +1334,8 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
1376static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, 1334static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1377 struct nilfs_btree_path *path, 1335 struct nilfs_btree_path *path,
1378 int *levelp, 1336 int *levelp,
1379 struct nilfs_bmap_stats *stats) 1337 struct nilfs_bmap_stats *stats,
1338 struct inode *dat)
1380{ 1339{
1381 struct buffer_head *bh; 1340 struct buffer_head *bh;
1382 struct nilfs_btree_node *node, *parent, *sib; 1341 struct nilfs_btree_node *node, *parent, *sib;
@@ -1388,17 +1347,17 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1388 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1347 for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1389 level < nilfs_btree_height(btree) - 1; 1348 level < nilfs_btree_height(btree) - 1;
1390 level++) { 1349 level++) {
1391 node = nilfs_btree_get_nonroot_node(btree, path, level); 1350 node = nilfs_btree_get_nonroot_node(path, level);
1392 path[level].bp_oldreq.bpr_ptr = 1351 path[level].bp_oldreq.bpr_ptr =
1393 nilfs_btree_node_get_ptr(btree, node, 1352 nilfs_btree_node_get_ptr(btree, node,
1394 path[level].bp_index); 1353 path[level].bp_index);
1395 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, 1354 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1396 &path[level].bp_oldreq); 1355 &path[level].bp_oldreq, dat);
1397 if (ret < 0) 1356 if (ret < 0)
1398 goto err_out_child_node; 1357 goto err_out_child_node;
1399 1358
1400 if (nilfs_btree_node_get_nchildren(btree, node) > 1359 if (nilfs_btree_node_get_nchildren(node) >
1401 nilfs_btree_node_nchildren_min(btree, node)) { 1360 nilfs_btree_node_nchildren_min(node, btree)) {
1402 path[level].bp_op = nilfs_btree_do_delete; 1361 path[level].bp_op = nilfs_btree_do_delete;
1403 stats->bs_nblocks++; 1362 stats->bs_nblocks++;
1404 goto out; 1363 goto out;
@@ -1415,8 +1374,8 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1415 if (ret < 0) 1374 if (ret < 0)
1416 goto err_out_curr_node; 1375 goto err_out_curr_node;
1417 sib = (struct nilfs_btree_node *)bh->b_data; 1376 sib = (struct nilfs_btree_node *)bh->b_data;
1418 if (nilfs_btree_node_get_nchildren(btree, sib) > 1377 if (nilfs_btree_node_get_nchildren(sib) >
1419 nilfs_btree_node_nchildren_min(btree, sib)) { 1378 nilfs_btree_node_nchildren_min(sib, btree)) {
1420 path[level].bp_sib_bh = bh; 1379 path[level].bp_sib_bh = bh;
1421 path[level].bp_op = nilfs_btree_borrow_left; 1380 path[level].bp_op = nilfs_btree_borrow_left;
1422 stats->bs_nblocks++; 1381 stats->bs_nblocks++;
@@ -1428,7 +1387,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1428 /* continue; */ 1387 /* continue; */
1429 } 1388 }
1430 } else if (pindex < 1389 } else if (pindex <
1431 nilfs_btree_node_get_nchildren(btree, parent) - 1) { 1390 nilfs_btree_node_get_nchildren(parent) - 1) {
1432 /* right sibling */ 1391 /* right sibling */
1433 sibptr = nilfs_btree_node_get_ptr(btree, parent, 1392 sibptr = nilfs_btree_node_get_ptr(btree, parent,
1434 pindex + 1); 1393 pindex + 1);
@@ -1436,8 +1395,8 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1436 if (ret < 0) 1395 if (ret < 0)
1437 goto err_out_curr_node; 1396 goto err_out_curr_node;
1438 sib = (struct nilfs_btree_node *)bh->b_data; 1397 sib = (struct nilfs_btree_node *)bh->b_data;
1439 if (nilfs_btree_node_get_nchildren(btree, sib) > 1398 if (nilfs_btree_node_get_nchildren(sib) >
1440 nilfs_btree_node_nchildren_min(btree, sib)) { 1399 nilfs_btree_node_nchildren_min(sib, btree)) {
1441 path[level].bp_sib_bh = bh; 1400 path[level].bp_sib_bh = bh;
1442 path[level].bp_op = nilfs_btree_borrow_right; 1401 path[level].bp_op = nilfs_btree_borrow_right;
1443 stats->bs_nblocks++; 1402 stats->bs_nblocks++;
@@ -1452,7 +1411,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1452 /* no siblings */ 1411 /* no siblings */
1453 /* the only child of the root node */ 1412 /* the only child of the root node */
1454 WARN_ON(level != nilfs_btree_height(btree) - 2); 1413 WARN_ON(level != nilfs_btree_height(btree) - 2);
1455 if (nilfs_btree_node_get_nchildren(btree, node) - 1 <= 1414 if (nilfs_btree_node_get_nchildren(node) - 1 <=
1456 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1415 NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1457 path[level].bp_op = nilfs_btree_shrink; 1416 path[level].bp_op = nilfs_btree_shrink;
1458 stats->bs_nblocks += 2; 1417 stats->bs_nblocks += 2;
@@ -1471,7 +1430,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1471 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); 1430 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1472 1431
1473 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, 1432 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1474 &path[level].bp_oldreq); 1433 &path[level].bp_oldreq, dat);
1475 if (ret < 0) 1434 if (ret < 0)
1476 goto err_out_child_node; 1435 goto err_out_child_node;
1477 1436
@@ -1486,12 +1445,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1486 1445
1487 /* error */ 1446 /* error */
1488 err_out_curr_node: 1447 err_out_curr_node:
1489 nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq); 1448 nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1490 err_out_child_node: 1449 err_out_child_node:
1491 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1450 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1492 brelse(path[level].bp_sib_bh); 1451 brelse(path[level].bp_sib_bh);
1493 nilfs_bmap_abort_end_ptr(&btree->bt_bmap, 1452 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1494 &path[level].bp_oldreq); 1453 &path[level].bp_oldreq, dat);
1495 } 1454 }
1496 *levelp = level; 1455 *levelp = level;
1497 stats->bs_nblocks = 0; 1456 stats->bs_nblocks = 0;
@@ -1500,13 +1459,13 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1500 1459
1501static void nilfs_btree_commit_delete(struct nilfs_btree *btree, 1460static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1502 struct nilfs_btree_path *path, 1461 struct nilfs_btree_path *path,
1503 int maxlevel) 1462 int maxlevel, struct inode *dat)
1504{ 1463{
1505 int level; 1464 int level;
1506 1465
1507 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1466 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1508 nilfs_bmap_commit_end_ptr(&btree->bt_bmap, 1467 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1509 &path[level].bp_oldreq); 1468 &path[level].bp_oldreq, dat);
1510 path[level].bp_op(btree, path, level, NULL, NULL); 1469 path[level].bp_op(btree, path, level, NULL, NULL);
1511 } 1470 }
1512 1471
@@ -1520,27 +1479,32 @@ static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1520 struct nilfs_btree *btree; 1479 struct nilfs_btree *btree;
1521 struct nilfs_btree_path *path; 1480 struct nilfs_btree_path *path;
1522 struct nilfs_bmap_stats stats; 1481 struct nilfs_bmap_stats stats;
1482 struct inode *dat;
1523 int level, ret; 1483 int level, ret;
1524 1484
1525 btree = (struct nilfs_btree *)bmap; 1485 btree = (struct nilfs_btree *)bmap;
1526 path = nilfs_btree_alloc_path(btree); 1486 path = nilfs_btree_alloc_path();
1527 if (path == NULL) 1487 if (path == NULL)
1528 return -ENOMEM; 1488 return -ENOMEM;
1529 nilfs_btree_init_path(btree, path); 1489 nilfs_btree_init_path(path);
1530 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1490 ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1531 NILFS_BTREE_LEVEL_NODE_MIN); 1491 NILFS_BTREE_LEVEL_NODE_MIN);
1532 if (ret < 0) 1492 if (ret < 0)
1533 goto out; 1493 goto out;
1534 1494
1535 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats); 1495
1496 dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1497 nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1498
1499 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1536 if (ret < 0) 1500 if (ret < 0)
1537 goto out; 1501 goto out;
1538 nilfs_btree_commit_delete(btree, path, level); 1502 nilfs_btree_commit_delete(btree, path, level, dat);
1539 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); 1503 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1540 1504
1541out: 1505out:
1542 nilfs_btree_clear_path(btree, path); 1506 nilfs_btree_release_path(path);
1543 nilfs_btree_free_path(btree, path); 1507 nilfs_btree_free_path(path);
1544 return ret; 1508 return ret;
1545} 1509}
1546 1510
@@ -1551,15 +1515,15 @@ static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1551 int ret; 1515 int ret;
1552 1516
1553 btree = (struct nilfs_btree *)bmap; 1517 btree = (struct nilfs_btree *)bmap;
1554 path = nilfs_btree_alloc_path(btree); 1518 path = nilfs_btree_alloc_path();
1555 if (path == NULL) 1519 if (path == NULL)
1556 return -ENOMEM; 1520 return -ENOMEM;
1557 nilfs_btree_init_path(btree, path); 1521 nilfs_btree_init_path(path);
1558 1522
1559 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1523 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1560 1524
1561 nilfs_btree_clear_path(btree, path); 1525 nilfs_btree_release_path(path);
1562 nilfs_btree_free_path(btree, path); 1526 nilfs_btree_free_path(path);
1563 1527
1564 return ret; 1528 return ret;
1565} 1529}
@@ -1581,7 +1545,7 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1581 node = root; 1545 node = root;
1582 break; 1546 break;
1583 case 3: 1547 case 3:
1584 nchildren = nilfs_btree_node_get_nchildren(btree, root); 1548 nchildren = nilfs_btree_node_get_nchildren(root);
1585 if (nchildren > 1) 1549 if (nchildren > 1)
1586 return 0; 1550 return 0;
1587 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); 1551 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
@@ -1594,10 +1558,10 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1594 return 0; 1558 return 0;
1595 } 1559 }
1596 1560
1597 nchildren = nilfs_btree_node_get_nchildren(btree, node); 1561 nchildren = nilfs_btree_node_get_nchildren(node);
1598 maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1); 1562 maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1599 nextmaxkey = (nchildren > 1) ? 1563 nextmaxkey = (nchildren > 1) ?
1600 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0; 1564 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1601 if (bh != NULL) 1565 if (bh != NULL)
1602 brelse(bh); 1566 brelse(bh);
1603 1567
@@ -1623,7 +1587,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1623 node = root; 1587 node = root;
1624 break; 1588 break;
1625 case 3: 1589 case 3:
1626 nchildren = nilfs_btree_node_get_nchildren(btree, root); 1590 nchildren = nilfs_btree_node_get_nchildren(root);
1627 WARN_ON(nchildren > 1); 1591 WARN_ON(nchildren > 1);
1628 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); 1592 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1629 ret = nilfs_btree_get_block(btree, ptr, &bh); 1593 ret = nilfs_btree_get_block(btree, ptr, &bh);
@@ -1636,11 +1600,11 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1636 return -EINVAL; 1600 return -EINVAL;
1637 } 1601 }
1638 1602
1639 nchildren = nilfs_btree_node_get_nchildren(btree, node); 1603 nchildren = nilfs_btree_node_get_nchildren(node);
1640 if (nchildren < nitems) 1604 if (nchildren < nitems)
1641 nitems = nchildren; 1605 nitems = nchildren;
1642 dkeys = nilfs_btree_node_dkeys(btree, node); 1606 dkeys = nilfs_btree_node_dkeys(node);
1643 dptrs = nilfs_btree_node_dptrs(btree, node); 1607 dptrs = nilfs_btree_node_dptrs(node, btree);
1644 for (i = 0; i < nitems; i++) { 1608 for (i = 0; i < nitems; i++) {
1645 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]); 1609 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1646 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]); 1610 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
@@ -1660,18 +1624,20 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1660 struct nilfs_bmap_stats *stats) 1624 struct nilfs_bmap_stats *stats)
1661{ 1625{
1662 struct buffer_head *bh; 1626 struct buffer_head *bh;
1663 struct nilfs_btree *btree; 1627 struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1628 struct inode *dat = NULL;
1664 int ret; 1629 int ret;
1665 1630
1666 btree = (struct nilfs_btree *)bmap;
1667 stats->bs_nblocks = 0; 1631 stats->bs_nblocks = 0;
1668 1632
1669 /* for data */ 1633 /* for data */
1670 /* cannot find near ptr */ 1634 /* cannot find near ptr */
1671 if (NILFS_BMAP_USE_VBN(bmap)) 1635 if (NILFS_BMAP_USE_VBN(bmap)) {
1672 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1636 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1637 dat = nilfs_bmap_get_dat(bmap);
1638 }
1673 1639
1674 ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq); 1640 ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1675 if (ret < 0) 1641 if (ret < 0)
1676 return ret; 1642 return ret;
1677 1643
@@ -1679,7 +1645,7 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1679 stats->bs_nblocks++; 1645 stats->bs_nblocks++;
1680 if (nreq != NULL) { 1646 if (nreq != NULL) {
1681 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1647 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1682 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq); 1648 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1683 if (ret < 0) 1649 if (ret < 0)
1684 goto err_out_dreq; 1650 goto err_out_dreq;
1685 1651
@@ -1696,9 +1662,9 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1696 1662
1697 /* error */ 1663 /* error */
1698 err_out_nreq: 1664 err_out_nreq:
1699 nilfs_bmap_abort_alloc_ptr(bmap, nreq); 1665 nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1700 err_out_dreq: 1666 err_out_dreq:
1701 nilfs_bmap_abort_alloc_ptr(bmap, dreq); 1667 nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1702 stats->bs_nblocks = 0; 1668 stats->bs_nblocks = 0;
1703 return ret; 1669 return ret;
1704 1670
@@ -1713,8 +1679,9 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1713 union nilfs_bmap_ptr_req *nreq, 1679 union nilfs_bmap_ptr_req *nreq,
1714 struct buffer_head *bh) 1680 struct buffer_head *bh)
1715{ 1681{
1716 struct nilfs_btree *btree; 1682 struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1717 struct nilfs_btree_node *node; 1683 struct nilfs_btree_node *node;
1684 struct inode *dat;
1718 __u64 tmpptr; 1685 __u64 tmpptr;
1719 1686
1720 /* free resources */ 1687 /* free resources */
@@ -1725,11 +1692,11 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1725 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1692 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1726 1693
1727 /* convert and insert */ 1694 /* convert and insert */
1728 btree = (struct nilfs_btree *)bmap; 1695 dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1729 nilfs_btree_init(bmap); 1696 nilfs_btree_init(bmap);
1730 if (nreq != NULL) { 1697 if (nreq != NULL) {
1731 nilfs_bmap_commit_alloc_ptr(bmap, dreq); 1698 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1732 nilfs_bmap_commit_alloc_ptr(bmap, nreq); 1699 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1733 1700
1734 /* create child node at level 1 */ 1701 /* create child node at level 1 */
1735 lock_buffer(bh); 1702 lock_buffer(bh);
@@ -1751,7 +1718,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1751 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, 1718 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1752 2, 1, &keys[0], &tmpptr); 1719 2, 1, &keys[0], &tmpptr);
1753 } else { 1720 } else {
1754 nilfs_bmap_commit_alloc_ptr(bmap, dreq); 1721 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1755 1722
1756 /* create root node at level 1 */ 1723 /* create root node at level 1 */
1757 node = nilfs_btree_get_root(btree); 1724 node = nilfs_btree_get_root(btree);
@@ -1822,7 +1789,7 @@ static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1822 1789
1823static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, 1790static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1824 struct nilfs_btree_path *path, 1791 struct nilfs_btree_path *path,
1825 int level) 1792 int level, struct inode *dat)
1826{ 1793{
1827 struct nilfs_btree_node *parent; 1794 struct nilfs_btree_node *parent;
1828 int ret; 1795 int ret;
@@ -1832,9 +1799,8 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1832 nilfs_btree_node_get_ptr(btree, parent, 1799 nilfs_btree_node_get_ptr(btree, parent,
1833 path[level + 1].bp_index); 1800 path[level + 1].bp_index);
1834 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1801 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1835 ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap, 1802 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1836 &path[level].bp_oldreq, 1803 &path[level].bp_newreq.bpr_req);
1837 &path[level].bp_newreq);
1838 if (ret < 0) 1804 if (ret < 0)
1839 return ret; 1805 return ret;
1840 1806
@@ -1846,9 +1812,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1846 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, 1812 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1847 &path[level].bp_ctxt); 1813 &path[level].bp_ctxt);
1848 if (ret < 0) { 1814 if (ret < 0) {
1849 nilfs_bmap_abort_update_v(&btree->bt_bmap, 1815 nilfs_dat_abort_update(dat,
1850 &path[level].bp_oldreq, 1816 &path[level].bp_oldreq.bpr_req,
1851 &path[level].bp_newreq); 1817 &path[level].bp_newreq.bpr_req);
1852 return ret; 1818 return ret;
1853 } 1819 }
1854 } 1820 }
@@ -1858,13 +1824,13 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1858 1824
1859static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, 1825static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1860 struct nilfs_btree_path *path, 1826 struct nilfs_btree_path *path,
1861 int level) 1827 int level, struct inode *dat)
1862{ 1828{
1863 struct nilfs_btree_node *parent; 1829 struct nilfs_btree_node *parent;
1864 1830
1865 nilfs_bmap_commit_update_v(&btree->bt_bmap, 1831 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1866 &path[level].bp_oldreq, 1832 &path[level].bp_newreq.bpr_req,
1867 &path[level].bp_newreq); 1833 btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1868 1834
1869 if (buffer_nilfs_node(path[level].bp_bh)) { 1835 if (buffer_nilfs_node(path[level].bp_bh)) {
1870 nilfs_btnode_commit_change_key( 1836 nilfs_btnode_commit_change_key(
@@ -1881,11 +1847,10 @@ static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1881 1847
1882static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, 1848static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1883 struct nilfs_btree_path *path, 1849 struct nilfs_btree_path *path,
1884 int level) 1850 int level, struct inode *dat)
1885{ 1851{
1886 nilfs_bmap_abort_update_v(&btree->bt_bmap, 1852 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1887 &path[level].bp_oldreq, 1853 &path[level].bp_newreq.bpr_req);
1888 &path[level].bp_newreq);
1889 if (buffer_nilfs_node(path[level].bp_bh)) 1854 if (buffer_nilfs_node(path[level].bp_bh))
1890 nilfs_btnode_abort_change_key( 1855 nilfs_btnode_abort_change_key(
1891 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, 1856 &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
@@ -1894,14 +1859,14 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1894 1859
1895static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, 1860static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1896 struct nilfs_btree_path *path, 1861 struct nilfs_btree_path *path,
1897 int minlevel, 1862 int minlevel, int *maxlevelp,
1898 int *maxlevelp) 1863 struct inode *dat)
1899{ 1864{
1900 int level, ret; 1865 int level, ret;
1901 1866
1902 level = minlevel; 1867 level = minlevel;
1903 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 1868 if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1904 ret = nilfs_btree_prepare_update_v(btree, path, level); 1869 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1905 if (ret < 0) 1870 if (ret < 0)
1906 return ret; 1871 return ret;
1907 } 1872 }
@@ -1909,7 +1874,7 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1909 !buffer_dirty(path[level].bp_bh)) { 1874 !buffer_dirty(path[level].bp_bh)) {
1910 1875
1911 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 1876 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1912 ret = nilfs_btree_prepare_update_v(btree, path, level); 1877 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1913 if (ret < 0) 1878 if (ret < 0)
1914 goto out; 1879 goto out;
1915 } 1880 }
@@ -1921,39 +1886,40 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1921 /* error */ 1886 /* error */
1922 out: 1887 out:
1923 while (--level > minlevel) 1888 while (--level > minlevel)
1924 nilfs_btree_abort_update_v(btree, path, level); 1889 nilfs_btree_abort_update_v(btree, path, level, dat);
1925 if (!buffer_nilfs_volatile(path[level].bp_bh)) 1890 if (!buffer_nilfs_volatile(path[level].bp_bh))
1926 nilfs_btree_abort_update_v(btree, path, level); 1891 nilfs_btree_abort_update_v(btree, path, level, dat);
1927 return ret; 1892 return ret;
1928} 1893}
1929 1894
1930static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, 1895static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1931 struct nilfs_btree_path *path, 1896 struct nilfs_btree_path *path,
1932 int minlevel, 1897 int minlevel, int maxlevel,
1933 int maxlevel, 1898 struct buffer_head *bh,
1934 struct buffer_head *bh) 1899 struct inode *dat)
1935{ 1900{
1936 int level; 1901 int level;
1937 1902
1938 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 1903 if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1939 nilfs_btree_commit_update_v(btree, path, minlevel); 1904 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1940 1905
1941 for (level = minlevel + 1; level <= maxlevel; level++) 1906 for (level = minlevel + 1; level <= maxlevel; level++)
1942 nilfs_btree_commit_update_v(btree, path, level); 1907 nilfs_btree_commit_update_v(btree, path, level, dat);
1943} 1908}
1944 1909
1945static int nilfs_btree_propagate_v(struct nilfs_btree *btree, 1910static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1946 struct nilfs_btree_path *path, 1911 struct nilfs_btree_path *path,
1947 int level, 1912 int level, struct buffer_head *bh)
1948 struct buffer_head *bh)
1949{ 1913{
1950 int maxlevel, ret; 1914 int maxlevel, ret;
1951 struct nilfs_btree_node *parent; 1915 struct nilfs_btree_node *parent;
1916 struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1952 __u64 ptr; 1917 __u64 ptr;
1953 1918
1954 get_bh(bh); 1919 get_bh(bh);
1955 path[level].bp_bh = bh; 1920 path[level].bp_bh = bh;
1956 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel); 1921 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1922 dat);
1957 if (ret < 0) 1923 if (ret < 0)
1958 goto out; 1924 goto out;
1959 1925
@@ -1961,12 +1927,12 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1961 parent = nilfs_btree_get_node(btree, path, level + 1); 1927 parent = nilfs_btree_get_node(btree, path, level + 1);
1962 ptr = nilfs_btree_node_get_ptr(btree, parent, 1928 ptr = nilfs_btree_node_get_ptr(btree, parent,
1963 path[level + 1].bp_index); 1929 path[level + 1].bp_index);
1964 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr); 1930 ret = nilfs_dat_mark_dirty(dat, ptr);
1965 if (ret < 0) 1931 if (ret < 0)
1966 goto out; 1932 goto out;
1967 } 1933 }
1968 1934
1969 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh); 1935 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1970 1936
1971 out: 1937 out:
1972 brelse(path[level].bp_bh); 1938 brelse(path[level].bp_bh);
@@ -1986,15 +1952,15 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1986 WARN_ON(!buffer_dirty(bh)); 1952 WARN_ON(!buffer_dirty(bh));
1987 1953
1988 btree = (struct nilfs_btree *)bmap; 1954 btree = (struct nilfs_btree *)bmap;
1989 path = nilfs_btree_alloc_path(btree); 1955 path = nilfs_btree_alloc_path();
1990 if (path == NULL) 1956 if (path == NULL)
1991 return -ENOMEM; 1957 return -ENOMEM;
1992 nilfs_btree_init_path(btree, path); 1958 nilfs_btree_init_path(path);
1993 1959
1994 if (buffer_nilfs_node(bh)) { 1960 if (buffer_nilfs_node(bh)) {
1995 node = (struct nilfs_btree_node *)bh->b_data; 1961 node = (struct nilfs_btree_node *)bh->b_data;
1996 key = nilfs_btree_node_get_key(btree, node, 0); 1962 key = nilfs_btree_node_get_key(node, 0);
1997 level = nilfs_btree_node_get_level(btree, node); 1963 level = nilfs_btree_node_get_level(node);
1998 } else { 1964 } else {
1999 key = nilfs_bmap_data_get_key(bmap, bh); 1965 key = nilfs_bmap_data_get_key(bmap, bh);
2000 level = NILFS_BTREE_LEVEL_DATA; 1966 level = NILFS_BTREE_LEVEL_DATA;
@@ -2013,8 +1979,8 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
2013 nilfs_btree_propagate_p(btree, path, level, bh); 1979 nilfs_btree_propagate_p(btree, path, level, bh);
2014 1980
2015 out: 1981 out:
2016 nilfs_btree_clear_path(btree, path); 1982 nilfs_btree_release_path(path);
2017 nilfs_btree_free_path(btree, path); 1983 nilfs_btree_free_path(path);
2018 1984
2019 return ret; 1985 return ret;
2020} 1986}
@@ -2022,7 +1988,7 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
2022static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap, 1988static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
2023 struct buffer_head *bh) 1989 struct buffer_head *bh)
2024{ 1990{
2025 return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr); 1991 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
2026} 1992}
2027 1993
2028static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, 1994static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
@@ -2037,12 +2003,12 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
2037 2003
2038 get_bh(bh); 2004 get_bh(bh);
2039 node = (struct nilfs_btree_node *)bh->b_data; 2005 node = (struct nilfs_btree_node *)bh->b_data;
2040 key = nilfs_btree_node_get_key(btree, node, 0); 2006 key = nilfs_btree_node_get_key(node, 0);
2041 level = nilfs_btree_node_get_level(btree, node); 2007 level = nilfs_btree_node_get_level(node);
2042 list_for_each(head, &lists[level]) { 2008 list_for_each(head, &lists[level]) {
2043 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2009 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2044 cnode = (struct nilfs_btree_node *)cbh->b_data; 2010 cnode = (struct nilfs_btree_node *)cbh->b_data;
2045 ckey = nilfs_btree_node_get_key(btree, cnode, 0); 2011 ckey = nilfs_btree_node_get_key(cnode, 0);
2046 if (key < ckey) 2012 if (key < ckey)
2047 break; 2013 break;
2048 } 2014 }
@@ -2120,8 +2086,7 @@ static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2120 nilfs_btree_node_set_ptr(btree, parent, 2086 nilfs_btree_node_set_ptr(btree, parent,
2121 path[level + 1].bp_index, blocknr); 2087 path[level + 1].bp_index, blocknr);
2122 2088
2123 key = nilfs_btree_node_get_key(btree, parent, 2089 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2124 path[level + 1].bp_index);
2125 /* on-disk format */ 2090 /* on-disk format */
2126 binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); 2091 binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2127 binfo->bi_dat.bi_level = level; 2092 binfo->bi_dat.bi_level = level;
@@ -2137,6 +2102,7 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2137 union nilfs_binfo *binfo) 2102 union nilfs_binfo *binfo)
2138{ 2103{
2139 struct nilfs_btree_node *parent; 2104 struct nilfs_btree_node *parent;
2105 struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2140 __u64 key; 2106 __u64 key;
2141 __u64 ptr; 2107 __u64 ptr;
2142 union nilfs_bmap_ptr_req req; 2108 union nilfs_bmap_ptr_req req;
@@ -2146,12 +2112,12 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2146 ptr = nilfs_btree_node_get_ptr(btree, parent, 2112 ptr = nilfs_btree_node_get_ptr(btree, parent,
2147 path[level + 1].bp_index); 2113 path[level + 1].bp_index);
2148 req.bpr_ptr = ptr; 2114 req.bpr_ptr = ptr;
2149 ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr); 2115 ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2150 if (unlikely(ret < 0)) 2116 if (ret < 0)
2151 return ret; 2117 return ret;
2118 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2152 2119
2153 key = nilfs_btree_node_get_key(btree, parent, 2120 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2154 path[level + 1].bp_index);
2155 /* on-disk format */ 2121 /* on-disk format */
2156 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); 2122 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2157 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); 2123 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
@@ -2171,15 +2137,15 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2171 int level, ret; 2137 int level, ret;
2172 2138
2173 btree = (struct nilfs_btree *)bmap; 2139 btree = (struct nilfs_btree *)bmap;
2174 path = nilfs_btree_alloc_path(btree); 2140 path = nilfs_btree_alloc_path();
2175 if (path == NULL) 2141 if (path == NULL)
2176 return -ENOMEM; 2142 return -ENOMEM;
2177 nilfs_btree_init_path(btree, path); 2143 nilfs_btree_init_path(path);
2178 2144
2179 if (buffer_nilfs_node(*bh)) { 2145 if (buffer_nilfs_node(*bh)) {
2180 node = (struct nilfs_btree_node *)(*bh)->b_data; 2146 node = (struct nilfs_btree_node *)(*bh)->b_data;
2181 key = nilfs_btree_node_get_key(btree, node, 0); 2147 key = nilfs_btree_node_get_key(node, 0);
2182 level = nilfs_btree_node_get_level(btree, node); 2148 level = nilfs_btree_node_get_level(node);
2183 } else { 2149 } else {
2184 key = nilfs_bmap_data_get_key(bmap, *bh); 2150 key = nilfs_bmap_data_get_key(bmap, *bh);
2185 level = NILFS_BTREE_LEVEL_DATA; 2151 level = NILFS_BTREE_LEVEL_DATA;
@@ -2196,8 +2162,8 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2196 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2162 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2197 2163
2198 out: 2164 out:
2199 nilfs_btree_clear_path(btree, path); 2165 nilfs_btree_release_path(path);
2200 nilfs_btree_free_path(btree, path); 2166 nilfs_btree_free_path(path);
2201 2167
2202 return ret; 2168 return ret;
2203} 2169}
@@ -2207,19 +2173,18 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2207 sector_t blocknr, 2173 sector_t blocknr,
2208 union nilfs_binfo *binfo) 2174 union nilfs_binfo *binfo)
2209{ 2175{
2210 struct nilfs_btree *btree;
2211 struct nilfs_btree_node *node; 2176 struct nilfs_btree_node *node;
2212 __u64 key; 2177 __u64 key;
2213 int ret; 2178 int ret;
2214 2179
2215 btree = (struct nilfs_btree *)bmap; 2180 ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2216 ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr); 2181 blocknr);
2217 if (ret < 0) 2182 if (ret < 0)
2218 return ret; 2183 return ret;
2219 2184
2220 if (buffer_nilfs_node(*bh)) { 2185 if (buffer_nilfs_node(*bh)) {
2221 node = (struct nilfs_btree_node *)(*bh)->b_data; 2186 node = (struct nilfs_btree_node *)(*bh)->b_data;
2222 key = nilfs_btree_node_get_key(btree, node, 0); 2187 key = nilfs_btree_node_get_key(node, 0);
2223 } else 2188 } else
2224 key = nilfs_bmap_data_get_key(bmap, *bh); 2189 key = nilfs_bmap_data_get_key(bmap, *bh);
2225 2190
@@ -2239,10 +2204,10 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2239 int ret; 2204 int ret;
2240 2205
2241 btree = (struct nilfs_btree *)bmap; 2206 btree = (struct nilfs_btree *)bmap;
2242 path = nilfs_btree_alloc_path(btree); 2207 path = nilfs_btree_alloc_path();
2243 if (path == NULL) 2208 if (path == NULL)
2244 return -ENOMEM; 2209 return -ENOMEM;
2245 nilfs_btree_init_path(btree, path); 2210 nilfs_btree_init_path(path);
2246 2211
2247 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); 2212 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2248 if (ret < 0) { 2213 if (ret < 0) {
@@ -2262,8 +2227,8 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2262 nilfs_bmap_set_dirty(&btree->bt_bmap); 2227 nilfs_bmap_set_dirty(&btree->bt_bmap);
2263 2228
2264 out: 2229 out:
2265 nilfs_btree_clear_path(btree, path); 2230 nilfs_btree_release_path(path);
2266 nilfs_btree_free_path(btree, path); 2231 nilfs_btree_free_path(path);
2267 return ret; 2232 return ret;
2268} 2233}
2269 2234
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index aec942cf79e3..1c6cfb59128d 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -815,8 +815,10 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
815 void *kaddr; 815 void *kaddr;
816 int ret; 816 int ret;
817 817
818 if (cno == 0) 818 /* CP number is invalid if it's zero or larger than the
819 return -ENOENT; /* checkpoint number 0 is invalid */ 819 largest exist one.*/
820 if (cno == 0 || cno >= nilfs_mdt_cno(cpfile))
821 return -ENOENT;
820 down_read(&NILFS_MDT(cpfile)->mi_sem); 822 down_read(&NILFS_MDT(cpfile)->mi_sem);
821 823
822 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); 824 ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh);
@@ -824,7 +826,10 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
824 goto out; 826 goto out;
825 kaddr = kmap_atomic(bh->b_page, KM_USER0); 827 kaddr = kmap_atomic(bh->b_page, KM_USER0);
826 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); 828 cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
827 ret = nilfs_checkpoint_snapshot(cp); 829 if (nilfs_checkpoint_invalid(cp))
830 ret = -ENOENT;
831 else
832 ret = nilfs_checkpoint_snapshot(cp);
828 kunmap_atomic(kaddr, KM_USER0); 833 kunmap_atomic(kaddr, KM_USER0);
829 brelse(bh); 834 brelse(bh);
830 835
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
index 788a45950197..debea896e701 100644
--- a/fs/nilfs2/cpfile.h
+++ b/fs/nilfs2/cpfile.h
@@ -27,8 +27,6 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/nilfs2_fs.h> 28#include <linux/nilfs2_fs.h>
29 29
30#define NILFS_CPFILE_GFP NILFS_MDT_GFP
31
32 30
33int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, 31int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int,
34 struct nilfs_checkpoint **, 32 struct nilfs_checkpoint **,
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 8927ca27e6f7..1ff8e15bd36b 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -109,12 +109,6 @@ void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req)
109 nilfs_palloc_commit_free_entry(dat, req); 109 nilfs_palloc_commit_free_entry(dat, req);
110} 110}
111 111
112void nilfs_dat_abort_free(struct inode *dat, struct nilfs_palloc_req *req)
113{
114 nilfs_dat_abort_entry(dat, req);
115 nilfs_palloc_abort_free_entry(dat, req);
116}
117
118int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req) 112int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req)
119{ 113{
120 int ret; 114 int ret;
@@ -140,11 +134,6 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
140 nilfs_dat_commit_entry(dat, req); 134 nilfs_dat_commit_entry(dat, req);
141} 135}
142 136
143void nilfs_dat_abort_start(struct inode *dat, struct nilfs_palloc_req *req)
144{
145 nilfs_dat_abort_entry(dat, req);
146}
147
148int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) 137int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
149{ 138{
150 struct nilfs_dat_entry *entry; 139 struct nilfs_dat_entry *entry;
@@ -222,6 +211,37 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req)
222 nilfs_dat_abort_entry(dat, req); 211 nilfs_dat_abort_entry(dat, req);
223} 212}
224 213
214int nilfs_dat_prepare_update(struct inode *dat,
215 struct nilfs_palloc_req *oldreq,
216 struct nilfs_palloc_req *newreq)
217{
218 int ret;
219
220 ret = nilfs_dat_prepare_end(dat, oldreq);
221 if (!ret) {
222 ret = nilfs_dat_prepare_alloc(dat, newreq);
223 if (ret < 0)
224 nilfs_dat_abort_end(dat, oldreq);
225 }
226 return ret;
227}
228
229void nilfs_dat_commit_update(struct inode *dat,
230 struct nilfs_palloc_req *oldreq,
231 struct nilfs_palloc_req *newreq, int dead)
232{
233 nilfs_dat_commit_end(dat, oldreq, dead);
234 nilfs_dat_commit_alloc(dat, newreq);
235}
236
237void nilfs_dat_abort_update(struct inode *dat,
238 struct nilfs_palloc_req *oldreq,
239 struct nilfs_palloc_req *newreq)
240{
241 nilfs_dat_abort_end(dat, oldreq);
242 nilfs_dat_abort_alloc(dat, newreq);
243}
244
225/** 245/**
226 * nilfs_dat_mark_dirty - 246 * nilfs_dat_mark_dirty -
227 * @dat: DAT file inode 247 * @dat: DAT file inode
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
index d328b81eead4..406070d3ff49 100644
--- a/fs/nilfs2/dat.h
+++ b/fs/nilfs2/dat.h
@@ -27,7 +27,6 @@
27#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
28#include <linux/fs.h> 28#include <linux/fs.h>
29 29
30#define NILFS_DAT_GFP NILFS_MDT_GFP
31 30
32struct nilfs_palloc_req; 31struct nilfs_palloc_req;
33 32
@@ -39,10 +38,15 @@ void nilfs_dat_abort_alloc(struct inode *, struct nilfs_palloc_req *);
39int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *); 38int nilfs_dat_prepare_start(struct inode *, struct nilfs_palloc_req *);
40void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *, 39void nilfs_dat_commit_start(struct inode *, struct nilfs_palloc_req *,
41 sector_t); 40 sector_t);
42void nilfs_dat_abort_start(struct inode *, struct nilfs_palloc_req *);
43int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *); 41int nilfs_dat_prepare_end(struct inode *, struct nilfs_palloc_req *);
44void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int); 42void nilfs_dat_commit_end(struct inode *, struct nilfs_palloc_req *, int);
45void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *); 43void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *);
44int nilfs_dat_prepare_update(struct inode *, struct nilfs_palloc_req *,
45 struct nilfs_palloc_req *);
46void nilfs_dat_commit_update(struct inode *, struct nilfs_palloc_req *,
47 struct nilfs_palloc_req *, int);
48void nilfs_dat_abort_update(struct inode *, struct nilfs_palloc_req *,
49 struct nilfs_palloc_req *);
46 50
47int nilfs_dat_mark_dirty(struct inode *, __u64); 51int nilfs_dat_mark_dirty(struct inode *, __u64);
48int nilfs_dat_freev(struct inode *, __u64 *, size_t); 52int nilfs_dat_freev(struct inode *, __u64 *, size_t);
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 342d9765df8d..d369ac718277 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -125,106 +125,64 @@ static void nilfs_direct_set_target_v(struct nilfs_direct *direct,
125 direct->d_bmap.b_last_allocated_ptr = ptr; 125 direct->d_bmap.b_last_allocated_ptr = ptr;
126} 126}
127 127
128static int nilfs_direct_prepare_insert(struct nilfs_direct *direct,
129 __u64 key,
130 union nilfs_bmap_ptr_req *req,
131 struct nilfs_bmap_stats *stats)
132{
133 int ret;
134
135 if (NILFS_BMAP_USE_VBN(&direct->d_bmap))
136 req->bpr_ptr = nilfs_direct_find_target_v(direct, key);
137 ret = nilfs_bmap_prepare_alloc_ptr(&direct->d_bmap, req);
138 if (ret < 0)
139 return ret;
140
141 stats->bs_nblocks = 1;
142 return 0;
143}
144
145static void nilfs_direct_commit_insert(struct nilfs_direct *direct,
146 union nilfs_bmap_ptr_req *req,
147 __u64 key, __u64 ptr)
148{
149 struct buffer_head *bh;
150
151 /* ptr must be a pointer to a buffer head. */
152 bh = (struct buffer_head *)((unsigned long)ptr);
153 set_buffer_nilfs_volatile(bh);
154
155 nilfs_bmap_commit_alloc_ptr(&direct->d_bmap, req);
156 nilfs_direct_set_ptr(direct, key, req->bpr_ptr);
157
158 if (!nilfs_bmap_dirty(&direct->d_bmap))
159 nilfs_bmap_set_dirty(&direct->d_bmap);
160
161 if (NILFS_BMAP_USE_VBN(&direct->d_bmap))
162 nilfs_direct_set_target_v(direct, key, req->bpr_ptr);
163}
164
165static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) 128static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
166{ 129{
167 struct nilfs_direct *direct; 130 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
168 union nilfs_bmap_ptr_req req; 131 union nilfs_bmap_ptr_req req;
169 struct nilfs_bmap_stats stats; 132 struct inode *dat = NULL;
133 struct buffer_head *bh;
170 int ret; 134 int ret;
171 135
172 direct = (struct nilfs_direct *)bmap;
173 if (key > NILFS_DIRECT_KEY_MAX) 136 if (key > NILFS_DIRECT_KEY_MAX)
174 return -ENOENT; 137 return -ENOENT;
175 if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) 138 if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR)
176 return -EEXIST; 139 return -EEXIST;
177 140
178 ret = nilfs_direct_prepare_insert(direct, key, &req, &stats); 141 if (NILFS_BMAP_USE_VBN(bmap)) {
179 if (ret < 0) 142 req.bpr_ptr = nilfs_direct_find_target_v(direct, key);
180 return ret; 143 dat = nilfs_bmap_get_dat(bmap);
181 nilfs_direct_commit_insert(direct, &req, key, ptr); 144 }
182 nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); 145 ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat);
146 if (!ret) {
147 /* ptr must be a pointer to a buffer head. */
148 bh = (struct buffer_head *)((unsigned long)ptr);
149 set_buffer_nilfs_volatile(bh);
183 150
184 return 0; 151 nilfs_bmap_commit_alloc_ptr(bmap, &req, dat);
185} 152 nilfs_direct_set_ptr(direct, key, req.bpr_ptr);
186 153
187static int nilfs_direct_prepare_delete(struct nilfs_direct *direct, 154 if (!nilfs_bmap_dirty(bmap))
188 union nilfs_bmap_ptr_req *req, 155 nilfs_bmap_set_dirty(bmap);
189 __u64 key,
190 struct nilfs_bmap_stats *stats)
191{
192 int ret;
193 156
194 req->bpr_ptr = nilfs_direct_get_ptr(direct, key); 157 if (NILFS_BMAP_USE_VBN(bmap))
195 ret = nilfs_bmap_prepare_end_ptr(&direct->d_bmap, req); 158 nilfs_direct_set_target_v(direct, key, req.bpr_ptr);
196 if (!ret)
197 stats->bs_nblocks = 1;
198 return ret;
199}
200 159
201static void nilfs_direct_commit_delete(struct nilfs_direct *direct, 160 nilfs_bmap_add_blocks(bmap, 1);
202 union nilfs_bmap_ptr_req *req, 161 }
203 __u64 key) 162 return ret;
204{
205 nilfs_bmap_commit_end_ptr(&direct->d_bmap, req);
206 nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR);
207} 163}
208 164
209static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) 165static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key)
210{ 166{
211 struct nilfs_direct *direct; 167 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
212 union nilfs_bmap_ptr_req req; 168 union nilfs_bmap_ptr_req req;
213 struct nilfs_bmap_stats stats; 169 struct inode *dat;
214 int ret; 170 int ret;
215 171
216 direct = (struct nilfs_direct *)bmap; 172 if (key > NILFS_DIRECT_KEY_MAX ||
217 if ((key > NILFS_DIRECT_KEY_MAX) ||
218 nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR) 173 nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR)
219 return -ENOENT; 174 return -ENOENT;
220 175
221 ret = nilfs_direct_prepare_delete(direct, &req, key, &stats); 176 dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
222 if (ret < 0) 177 req.bpr_ptr = nilfs_direct_get_ptr(direct, key);
223 return ret;
224 nilfs_direct_commit_delete(direct, &req, key);
225 nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
226 178
227 return 0; 179 ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat);
180 if (!ret) {
181 nilfs_bmap_commit_end_ptr(bmap, &req, dat);
182 nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR);
183 nilfs_bmap_sub_blocks(bmap, 1);
184 }
185 return ret;
228} 186}
229 187
230static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) 188static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
@@ -310,59 +268,56 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
310 return 0; 268 return 0;
311} 269}
312 270
313static int nilfs_direct_propagate_v(struct nilfs_direct *direct, 271static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
314 struct buffer_head *bh) 272 struct buffer_head *bh)
315{ 273{
316 union nilfs_bmap_ptr_req oldreq, newreq; 274 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
275 struct nilfs_palloc_req oldreq, newreq;
276 struct inode *dat;
317 __u64 key; 277 __u64 key;
318 __u64 ptr; 278 __u64 ptr;
319 int ret; 279 int ret;
320 280
321 key = nilfs_bmap_data_get_key(&direct->d_bmap, bh); 281 if (!NILFS_BMAP_USE_VBN(bmap))
282 return 0;
283
284 dat = nilfs_bmap_get_dat(bmap);
285 key = nilfs_bmap_data_get_key(bmap, bh);
322 ptr = nilfs_direct_get_ptr(direct, key); 286 ptr = nilfs_direct_get_ptr(direct, key);
323 if (!buffer_nilfs_volatile(bh)) { 287 if (!buffer_nilfs_volatile(bh)) {
324 oldreq.bpr_ptr = ptr; 288 oldreq.pr_entry_nr = ptr;
325 newreq.bpr_ptr = ptr; 289 newreq.pr_entry_nr = ptr;
326 ret = nilfs_bmap_prepare_update_v(&direct->d_bmap, &oldreq, 290 ret = nilfs_dat_prepare_update(dat, &oldreq, &newreq);
327 &newreq);
328 if (ret < 0) 291 if (ret < 0)
329 return ret; 292 return ret;
330 nilfs_bmap_commit_update_v(&direct->d_bmap, &oldreq, &newreq); 293 nilfs_dat_commit_update(dat, &oldreq, &newreq,
294 bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
331 set_buffer_nilfs_volatile(bh); 295 set_buffer_nilfs_volatile(bh);
332 nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr); 296 nilfs_direct_set_ptr(direct, key, newreq.pr_entry_nr);
333 } else 297 } else
334 ret = nilfs_bmap_mark_dirty(&direct->d_bmap, ptr); 298 ret = nilfs_dat_mark_dirty(dat, ptr);
335 299
336 return ret; 300 return ret;
337} 301}
338 302
339static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
340 struct buffer_head *bh)
341{
342 struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
343
344 return NILFS_BMAP_USE_VBN(bmap) ?
345 nilfs_direct_propagate_v(direct, bh) : 0;
346}
347
348static int nilfs_direct_assign_v(struct nilfs_direct *direct, 303static int nilfs_direct_assign_v(struct nilfs_direct *direct,
349 __u64 key, __u64 ptr, 304 __u64 key, __u64 ptr,
350 struct buffer_head **bh, 305 struct buffer_head **bh,
351 sector_t blocknr, 306 sector_t blocknr,
352 union nilfs_binfo *binfo) 307 union nilfs_binfo *binfo)
353{ 308{
309 struct inode *dat = nilfs_bmap_get_dat(&direct->d_bmap);
354 union nilfs_bmap_ptr_req req; 310 union nilfs_bmap_ptr_req req;
355 int ret; 311 int ret;
356 312
357 req.bpr_ptr = ptr; 313 req.bpr_ptr = ptr;
358 ret = nilfs_bmap_start_v(&direct->d_bmap, &req, blocknr); 314 ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
359 if (unlikely(ret < 0)) 315 if (!ret) {
360 return ret; 316 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
361 317 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
362 binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); 318 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
363 binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); 319 }
364 320 return ret;
365 return 0;
366} 321}
367 322
368static int nilfs_direct_assign_p(struct nilfs_direct *direct, 323static int nilfs_direct_assign_p(struct nilfs_direct *direct,
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h
index 5d30a35679b5..ecc3ba76db47 100644
--- a/fs/nilfs2/ifile.h
+++ b/fs/nilfs2/ifile.h
@@ -31,7 +31,6 @@
31#include "mdt.h" 31#include "mdt.h"
32#include "alloc.h" 32#include "alloc.h"
33 33
34#define NILFS_IFILE_GFP NILFS_MDT_GFP
35 34
36static inline struct nilfs_inode * 35static inline struct nilfs_inode *
37nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) 36nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh)
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index fe9d8f2a13f8..807e584b163d 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -430,7 +430,8 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino,
430 430
431 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); 431 raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh);
432 432
433 if (nilfs_read_inode_common(inode, raw_inode)) 433 err = nilfs_read_inode_common(inode, raw_inode);
434 if (err)
434 goto failed_unmap; 435 goto failed_unmap;
435 436
436 if (S_ISREG(inode->i_mode)) { 437 if (S_ISREG(inode->i_mode)) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 6ea5f872e2de..6572ea4bc4df 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -442,12 +442,6 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
442 const char *msg; 442 const char *msg;
443 int ret; 443 int ret;
444 444
445 ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]);
446 if (ret < 0) {
447 msg = "cannot read source blocks";
448 goto failed;
449 }
450
451 ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]); 445 ret = nilfs_ioctl_delete_checkpoints(nilfs, &argv[1], kbufs[1]);
452 if (ret < 0) { 446 if (ret < 0) {
453 /* 447 /*
@@ -548,7 +542,25 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
548 } 542 }
549 } 543 }
550 544
551 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); 545 /*
546 * nilfs_ioctl_move_blocks() will call nilfs_gc_iget(),
547 * which will operates an inode list without blocking.
548 * To protect the list from concurrent operations,
549 * nilfs_ioctl_move_blocks should be atomic operation.
550 */
551 if (test_and_set_bit(THE_NILFS_GC_RUNNING, &nilfs->ns_flags)) {
552 ret = -EBUSY;
553 goto out_free;
554 }
555
556 ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]);
557 if (ret < 0)
558 printk(KERN_ERR "NILFS: GC failed during preparation: "
559 "cannot read source blocks: err=%d\n", ret);
560 else
561 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
562
563 clear_nilfs_gc_running(nilfs);
552 564
553 out_free: 565 out_free:
554 while (--n >= 0) 566 while (--n >= 0)
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 2dfd47714ae5..156bf6091a96 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -103,15 +103,12 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
103 goto failed_unlock; 103 goto failed_unlock;
104 104
105 err = -EEXIST; 105 err = -EEXIST;
106 if (buffer_uptodate(bh) || buffer_mapped(bh)) 106 if (buffer_uptodate(bh))
107 goto failed_bh; 107 goto failed_bh;
108#if 0 108
109 /* The uptodate flag is not protected by the page lock, but
110 the mapped flag is. Thus, we don't have to wait the buffer. */
111 wait_on_buffer(bh); 109 wait_on_buffer(bh);
112 if (buffer_uptodate(bh)) 110 if (buffer_uptodate(bh))
113 goto failed_bh; 111 goto failed_bh;
114#endif
115 112
116 bh->b_bdev = nilfs->ns_bdev; 113 bh->b_bdev = nilfs->ns_bdev;
117 err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); 114 err = nilfs_mdt_insert_new_block(inode, block, bh, init_block);
@@ -139,7 +136,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
139 int mode, struct buffer_head **out_bh) 136 int mode, struct buffer_head **out_bh)
140{ 137{
141 struct buffer_head *bh; 138 struct buffer_head *bh;
142 unsigned long blknum = 0; 139 __u64 blknum = 0;
143 int ret = -ENOMEM; 140 int ret = -ENOMEM;
144 141
145 bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); 142 bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0);
@@ -162,17 +159,15 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
162 unlock_buffer(bh); 159 unlock_buffer(bh);
163 goto out; 160 goto out;
164 } 161 }
165 if (!buffer_mapped(bh)) { /* unused buffer */ 162
166 ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, 163 ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum);
167 &blknum); 164 if (unlikely(ret)) {
168 if (unlikely(ret)) { 165 unlock_buffer(bh);
169 unlock_buffer(bh); 166 goto failed_bh;
170 goto failed_bh;
171 }
172 bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev;
173 bh->b_blocknr = blknum;
174 set_buffer_mapped(bh);
175 } 167 }
168 bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev;
169 bh->b_blocknr = (sector_t)blknum;
170 set_buffer_mapped(bh);
176 171
177 bh->b_end_io = end_buffer_read_sync; 172 bh->b_end_io = end_buffer_read_sync;
178 get_bh(bh); 173 get_bh(bh);
@@ -402,6 +397,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
402 struct inode *inode = container_of(page->mapping, 397 struct inode *inode = container_of(page->mapping,
403 struct inode, i_data); 398 struct inode, i_data);
404 struct super_block *sb = inode->i_sb; 399 struct super_block *sb = inode->i_sb;
400 struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs;
405 struct nilfs_sb_info *writer = NULL; 401 struct nilfs_sb_info *writer = NULL;
406 int err = 0; 402 int err = 0;
407 403
@@ -411,9 +407,10 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
411 if (page->mapping->assoc_mapping) 407 if (page->mapping->assoc_mapping)
412 return 0; /* Do not request flush for shadow page cache */ 408 return 0; /* Do not request flush for shadow page cache */
413 if (!sb) { 409 if (!sb) {
414 writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs); 410 down_read(&nilfs->ns_writer_sem);
411 writer = nilfs->ns_writer;
415 if (!writer) { 412 if (!writer) {
416 nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs); 413 up_read(&nilfs->ns_writer_sem);
417 return -EROFS; 414 return -EROFS;
418 } 415 }
419 sb = writer->s_super; 416 sb = writer->s_super;
@@ -425,7 +422,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
425 nilfs_flush_segment(sb, inode->i_ino); 422 nilfs_flush_segment(sb, inode->i_ino);
426 423
427 if (writer) 424 if (writer)
428 nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs); 425 up_read(&nilfs->ns_writer_sem);
429 return err; 426 return err;
430} 427}
431 428
@@ -516,9 +513,10 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
516} 513}
517 514
518struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, 515struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb,
519 ino_t ino, gfp_t gfp_mask) 516 ino_t ino)
520{ 517{
521 struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, gfp_mask); 518 struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino,
519 NILFS_MDT_GFP);
522 520
523 if (!inode) 521 if (!inode)
524 return NULL; 522 return NULL;
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index df683e0bca6a..431599733c9b 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -74,8 +74,7 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long);
74int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); 74int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long);
75int nilfs_mdt_fetch_dirty(struct inode *); 75int nilfs_mdt_fetch_dirty(struct inode *);
76 76
77struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, 77struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t);
78 gfp_t);
79struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, 78struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *,
80 ino_t, gfp_t); 79 ino_t, gfp_t);
81void nilfs_mdt_destroy(struct inode *); 80void nilfs_mdt_destroy(struct inode *);
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index d80cc71be749..6dc83591d118 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -552,7 +552,8 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi,
552 printk(KERN_WARNING 552 printk(KERN_WARNING
553 "NILFS warning: error recovering data block " 553 "NILFS warning: error recovering data block "
554 "(err=%d, ino=%lu, block-offset=%llu)\n", 554 "(err=%d, ino=%lu, block-offset=%llu)\n",
555 err, rb->ino, (unsigned long long)rb->blkoff); 555 err, (unsigned long)rb->ino,
556 (unsigned long long)rb->blkoff);
556 if (!err2) 557 if (!err2)
557 err2 = err; 558 err2 = err;
558 next: 559 next:
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 9e3fe17bb96b..e6d9e37fa241 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -316,10 +316,10 @@ static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start,
316{ 316{
317 struct bio *bio; 317 struct bio *bio;
318 318
319 bio = bio_alloc(GFP_NOWAIT, nr_vecs); 319 bio = bio_alloc(GFP_NOIO, nr_vecs);
320 if (bio == NULL) { 320 if (bio == NULL) {
321 while (!bio && (nr_vecs >>= 1)) 321 while (!bio && (nr_vecs >>= 1))
322 bio = bio_alloc(GFP_NOWAIT, nr_vecs); 322 bio = bio_alloc(GFP_NOIO, nr_vecs);
323 } 323 }
324 if (likely(bio)) { 324 if (likely(bio)) {
325 bio->bi_bdev = sb->s_bdev; 325 bio->bi_bdev = sb->s_bdev;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 51ff3d0a4ee2..683df89dbae5 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2501,7 +2501,8 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci,
2501 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && 2501 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) &&
2502 nilfs_discontinued(nilfs)) { 2502 nilfs_discontinued(nilfs)) {
2503 down_write(&nilfs->ns_sem); 2503 down_write(&nilfs->ns_sem);
2504 req->sb_err = nilfs_commit_super(sbi, 0); 2504 req->sb_err = nilfs_commit_super(sbi,
2505 nilfs_altsb_need_update(nilfs));
2505 up_write(&nilfs->ns_sem); 2506 up_write(&nilfs->ns_sem);
2506 } 2507 }
2507 } 2508 }
@@ -2689,6 +2690,7 @@ static int nilfs_segctor_thread(void *arg)
2689 } else { 2690 } else {
2690 DEFINE_WAIT(wait); 2691 DEFINE_WAIT(wait);
2691 int should_sleep = 1; 2692 int should_sleep = 1;
2693 struct the_nilfs *nilfs;
2692 2694
2693 prepare_to_wait(&sci->sc_wait_daemon, &wait, 2695 prepare_to_wait(&sci->sc_wait_daemon, &wait,
2694 TASK_INTERRUPTIBLE); 2696 TASK_INTERRUPTIBLE);
@@ -2709,6 +2711,9 @@ static int nilfs_segctor_thread(void *arg)
2709 finish_wait(&sci->sc_wait_daemon, &wait); 2711 finish_wait(&sci->sc_wait_daemon, &wait);
2710 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2712 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) &&
2711 time_after_eq(jiffies, sci->sc_timer->expires)); 2713 time_after_eq(jiffies, sci->sc_timer->expires));
2714 nilfs = sci->sc_sbi->s_nilfs;
2715 if (sci->sc_super->s_dirt && nilfs_sb_need_update(nilfs))
2716 set_nilfs_discontinued(nilfs);
2712 } 2717 }
2713 goto loop; 2718 goto loop;
2714 2719
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a2c4d76c3366..0e99e5c0bd0f 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -28,7 +28,6 @@
28#include <linux/nilfs2_fs.h> 28#include <linux/nilfs2_fs.h>
29#include "mdt.h" 29#include "mdt.h"
30 30
31#define NILFS_SUFILE_GFP NILFS_MDT_GFP
32 31
33static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) 32static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
34{ 33{
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 151964f0de4c..55f3d6b60732 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -50,6 +50,8 @@
50#include <linux/writeback.h> 50#include <linux/writeback.h>
51#include <linux/kobject.h> 51#include <linux/kobject.h>
52#include <linux/exportfs.h> 52#include <linux/exportfs.h>
53#include <linux/seq_file.h>
54#include <linux/mount.h>
53#include "nilfs.h" 55#include "nilfs.h"
54#include "mdt.h" 56#include "mdt.h"
55#include "alloc.h" 57#include "alloc.h"
@@ -65,7 +67,6 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
65 "(NILFS)"); 67 "(NILFS)");
66MODULE_LICENSE("GPL"); 68MODULE_LICENSE("GPL");
67 69
68static void nilfs_write_super(struct super_block *sb);
69static int nilfs_remount(struct super_block *sb, int *flags, char *data); 70static int nilfs_remount(struct super_block *sb, int *flags, char *data);
70 71
71/** 72/**
@@ -311,9 +312,6 @@ static void nilfs_put_super(struct super_block *sb)
311 312
312 lock_kernel(); 313 lock_kernel();
313 314
314 if (sb->s_dirt)
315 nilfs_write_super(sb);
316
317 nilfs_detach_segment_constructor(sbi); 315 nilfs_detach_segment_constructor(sbi);
318 316
319 if (!(sb->s_flags & MS_RDONLY)) { 317 if (!(sb->s_flags & MS_RDONLY)) {
@@ -336,63 +334,21 @@ static void nilfs_put_super(struct super_block *sb)
336 unlock_kernel(); 334 unlock_kernel();
337} 335}
338 336
339/** 337static int nilfs_sync_fs(struct super_block *sb, int wait)
340 * nilfs_write_super - write super block(s) of NILFS
341 * @sb: super_block
342 *
343 * nilfs_write_super() gets a fs-dependent lock, writes super block(s), and
344 * clears s_dirt. This function is called in the section protected by
345 * lock_super().
346 *
347 * The s_dirt flag is managed by each filesystem and we protect it by ns_sem
348 * of the struct the_nilfs. Lock order must be as follows:
349 *
350 * 1. lock_super()
351 * 2. down_write(&nilfs->ns_sem)
352 *
353 * Inside NILFS, locking ns_sem is enough to protect s_dirt and the buffer
354 * of the super block (nilfs->ns_sbp[]).
355 *
356 * In most cases, VFS functions call lock_super() before calling these
357 * methods. So we must be careful not to bring on deadlocks when using
358 * lock_super(); see generic_shutdown_super(), write_super(), and so on.
359 *
360 * Note that order of lock_kernel() and lock_super() depends on contexts
361 * of VFS. We should also note that lock_kernel() can be used in its
362 * protective section and only the outermost one has an effect.
363 */
364static void nilfs_write_super(struct super_block *sb)
365{ 338{
366 struct nilfs_sb_info *sbi = NILFS_SB(sb); 339 struct nilfs_sb_info *sbi = NILFS_SB(sb);
367 struct the_nilfs *nilfs = sbi->s_nilfs; 340 struct the_nilfs *nilfs = sbi->s_nilfs;
368
369 down_write(&nilfs->ns_sem);
370 if (!(sb->s_flags & MS_RDONLY)) {
371 struct nilfs_super_block **sbp = nilfs->ns_sbp;
372 u64 t = get_seconds();
373 int dupsb;
374
375 if (!nilfs_discontinued(nilfs) && t >= nilfs->ns_sbwtime[0] &&
376 t < nilfs->ns_sbwtime[0] + NILFS_SB_FREQ) {
377 up_write(&nilfs->ns_sem);
378 return;
379 }
380 dupsb = sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ;
381 nilfs_commit_super(sbi, dupsb);
382 }
383 sb->s_dirt = 0;
384 up_write(&nilfs->ns_sem);
385}
386
387static int nilfs_sync_fs(struct super_block *sb, int wait)
388{
389 int err = 0; 341 int err = 0;
390 342
391 nilfs_write_super(sb);
392
393 /* This function is called when super block should be written back */ 343 /* This function is called when super block should be written back */
394 if (wait) 344 if (wait)
395 err = nilfs_construct_segment(sb); 345 err = nilfs_construct_segment(sb);
346
347 down_write(&nilfs->ns_sem);
348 if (sb->s_dirt)
349 nilfs_commit_super(sbi, 1);
350 up_write(&nilfs->ns_sem);
351
396 return err; 352 return err;
397} 353}
398 354
@@ -407,8 +363,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
407 list_add(&sbi->s_list, &nilfs->ns_supers); 363 list_add(&sbi->s_list, &nilfs->ns_supers);
408 up_write(&nilfs->ns_super_sem); 364 up_write(&nilfs->ns_super_sem);
409 365
410 sbi->s_ifile = nilfs_mdt_new( 366 sbi->s_ifile = nilfs_mdt_new(nilfs, sbi->s_super, NILFS_IFILE_INO);
411 nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP);
412 if (!sbi->s_ifile) 367 if (!sbi->s_ifile)
413 return -ENOMEM; 368 return -ENOMEM;
414 369
@@ -529,6 +484,26 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
529 return 0; 484 return 0;
530} 485}
531 486
487static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
488{
489 struct super_block *sb = vfs->mnt_sb;
490 struct nilfs_sb_info *sbi = NILFS_SB(sb);
491
492 if (!nilfs_test_opt(sbi, BARRIER))
493 seq_printf(seq, ",barrier=off");
494 if (nilfs_test_opt(sbi, SNAPSHOT))
495 seq_printf(seq, ",cp=%llu",
496 (unsigned long long int)sbi->s_snapshot_cno);
497 if (nilfs_test_opt(sbi, ERRORS_RO))
498 seq_printf(seq, ",errors=remount-ro");
499 if (nilfs_test_opt(sbi, ERRORS_PANIC))
500 seq_printf(seq, ",errors=panic");
501 if (nilfs_test_opt(sbi, STRICT_ORDER))
502 seq_printf(seq, ",order=strict");
503
504 return 0;
505}
506
532static struct super_operations nilfs_sops = { 507static struct super_operations nilfs_sops = {
533 .alloc_inode = nilfs_alloc_inode, 508 .alloc_inode = nilfs_alloc_inode,
534 .destroy_inode = nilfs_destroy_inode, 509 .destroy_inode = nilfs_destroy_inode,
@@ -538,7 +513,7 @@ static struct super_operations nilfs_sops = {
538 /* .drop_inode = nilfs_drop_inode, */ 513 /* .drop_inode = nilfs_drop_inode, */
539 .delete_inode = nilfs_delete_inode, 514 .delete_inode = nilfs_delete_inode,
540 .put_super = nilfs_put_super, 515 .put_super = nilfs_put_super,
541 .write_super = nilfs_write_super, 516 /* .write_super = nilfs_write_super, */
542 .sync_fs = nilfs_sync_fs, 517 .sync_fs = nilfs_sync_fs,
543 /* .write_super_lockfs */ 518 /* .write_super_lockfs */
544 /* .unlockfs */ 519 /* .unlockfs */
@@ -546,7 +521,7 @@ static struct super_operations nilfs_sops = {
546 .remount_fs = nilfs_remount, 521 .remount_fs = nilfs_remount,
547 .clear_inode = nilfs_clear_inode, 522 .clear_inode = nilfs_clear_inode,
548 /* .umount_begin */ 523 /* .umount_begin */
549 /* .show_options */ 524 .show_options = nilfs_show_options
550}; 525};
551 526
552static struct inode * 527static struct inode *
@@ -816,10 +791,15 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
816 791
817 if (sb->s_flags & MS_RDONLY) { 792 if (sb->s_flags & MS_RDONLY) {
818 if (nilfs_test_opt(sbi, SNAPSHOT)) { 793 if (nilfs_test_opt(sbi, SNAPSHOT)) {
794 down_read(&nilfs->ns_segctor_sem);
819 err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, 795 err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile,
820 sbi->s_snapshot_cno); 796 sbi->s_snapshot_cno);
821 if (err < 0) 797 up_read(&nilfs->ns_segctor_sem);
798 if (err < 0) {
799 if (err == -ENOENT)
800 err = -EINVAL;
822 goto failed_sbi; 801 goto failed_sbi;
802 }
823 if (!err) { 803 if (!err) {
824 printk(KERN_ERR 804 printk(KERN_ERR
825 "NILFS: The specified checkpoint is " 805 "NILFS: The specified checkpoint is "
@@ -1127,10 +1107,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
1127 */ 1107 */
1128 sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno); 1108 sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno);
1129 1109
1130 if (!sd.cno)
1131 /* trying to get the latest checkpoint. */
1132 sd.cno = nilfs_last_cno(nilfs);
1133
1134 /* 1110 /*
1135 * Get super block instance holding the nilfs_sb_info struct. 1111 * Get super block instance holding the nilfs_sb_info struct.
1136 * A new instance is allocated if no existing mount is present or 1112 * A new instance is allocated if no existing mount is present or
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 8b8889825716..d4168e269c5d 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -68,12 +68,11 @@ static struct the_nilfs *alloc_nilfs(struct block_device *bdev)
68 68
69 nilfs->ns_bdev = bdev; 69 nilfs->ns_bdev = bdev;
70 atomic_set(&nilfs->ns_count, 1); 70 atomic_set(&nilfs->ns_count, 1);
71 atomic_set(&nilfs->ns_writer_refcount, -1);
72 atomic_set(&nilfs->ns_ndirtyblks, 0); 71 atomic_set(&nilfs->ns_ndirtyblks, 0);
73 init_rwsem(&nilfs->ns_sem); 72 init_rwsem(&nilfs->ns_sem);
74 init_rwsem(&nilfs->ns_super_sem); 73 init_rwsem(&nilfs->ns_super_sem);
75 mutex_init(&nilfs->ns_mount_mutex); 74 mutex_init(&nilfs->ns_mount_mutex);
76 mutex_init(&nilfs->ns_writer_mutex); 75 init_rwsem(&nilfs->ns_writer_sem);
77 INIT_LIST_HEAD(&nilfs->ns_list); 76 INIT_LIST_HEAD(&nilfs->ns_list);
78 INIT_LIST_HEAD(&nilfs->ns_supers); 77 INIT_LIST_HEAD(&nilfs->ns_supers);
79 spin_lock_init(&nilfs->ns_last_segment_lock); 78 spin_lock_init(&nilfs->ns_last_segment_lock);
@@ -188,23 +187,19 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
188 inode_size = nilfs->ns_inode_size; 187 inode_size = nilfs->ns_inode_size;
189 188
190 err = -ENOMEM; 189 err = -ENOMEM;
191 nilfs->ns_dat = nilfs_mdt_new( 190 nilfs->ns_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO);
192 nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP);
193 if (unlikely(!nilfs->ns_dat)) 191 if (unlikely(!nilfs->ns_dat))
194 goto failed; 192 goto failed;
195 193
196 nilfs->ns_gc_dat = nilfs_mdt_new( 194 nilfs->ns_gc_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO);
197 nilfs, NULL, NILFS_DAT_INO, NILFS_DAT_GFP);
198 if (unlikely(!nilfs->ns_gc_dat)) 195 if (unlikely(!nilfs->ns_gc_dat))
199 goto failed_dat; 196 goto failed_dat;
200 197
201 nilfs->ns_cpfile = nilfs_mdt_new( 198 nilfs->ns_cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO);
202 nilfs, NULL, NILFS_CPFILE_INO, NILFS_CPFILE_GFP);
203 if (unlikely(!nilfs->ns_cpfile)) 199 if (unlikely(!nilfs->ns_cpfile))
204 goto failed_gc_dat; 200 goto failed_gc_dat;
205 201
206 nilfs->ns_sufile = nilfs_mdt_new( 202 nilfs->ns_sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO);
207 nilfs, NULL, NILFS_SUFILE_INO, NILFS_SUFILE_GFP);
208 if (unlikely(!nilfs->ns_sufile)) 203 if (unlikely(!nilfs->ns_sufile))
209 goto failed_cpfile; 204 goto failed_cpfile;
210 205
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 1b9caafb8662..20abd55881e0 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -37,6 +37,7 @@ enum {
37 THE_NILFS_LOADED, /* Roll-back/roll-forward has done and 37 THE_NILFS_LOADED, /* Roll-back/roll-forward has done and
38 the latest checkpoint was loaded */ 38 the latest checkpoint was loaded */
39 THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ 39 THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
40 THE_NILFS_GC_RUNNING, /* gc process is running */
40}; 41};
41 42
42/** 43/**
@@ -50,8 +51,7 @@ enum {
50 * @ns_sem: semaphore for shared states 51 * @ns_sem: semaphore for shared states
51 * @ns_super_sem: semaphore for global operations across super block instances 52 * @ns_super_sem: semaphore for global operations across super block instances
52 * @ns_mount_mutex: mutex protecting mount process of nilfs 53 * @ns_mount_mutex: mutex protecting mount process of nilfs
53 * @ns_writer_mutex: mutex protecting ns_writer attach/detach 54 * @ns_writer_sem: semaphore protecting ns_writer attach/detach
54 * @ns_writer_refcount: number of referrers on ns_writer
55 * @ns_current: back pointer to current mount 55 * @ns_current: back pointer to current mount
56 * @ns_sbh: buffer heads of on-disk super blocks 56 * @ns_sbh: buffer heads of on-disk super blocks
57 * @ns_sbp: pointers to super block data 57 * @ns_sbp: pointers to super block data
@@ -100,8 +100,7 @@ struct the_nilfs {
100 struct rw_semaphore ns_sem; 100 struct rw_semaphore ns_sem;
101 struct rw_semaphore ns_super_sem; 101 struct rw_semaphore ns_super_sem;
102 struct mutex ns_mount_mutex; 102 struct mutex ns_mount_mutex;
103 struct mutex ns_writer_mutex; 103 struct rw_semaphore ns_writer_sem;
104 atomic_t ns_writer_refcount;
105 104
106 /* 105 /*
107 * components protected by ns_super_sem 106 * components protected by ns_super_sem
@@ -197,11 +196,26 @@ static inline int nilfs_##name(struct the_nilfs *nilfs) \
197THE_NILFS_FNS(INIT, init) 196THE_NILFS_FNS(INIT, init)
198THE_NILFS_FNS(LOADED, loaded) 197THE_NILFS_FNS(LOADED, loaded)
199THE_NILFS_FNS(DISCONTINUED, discontinued) 198THE_NILFS_FNS(DISCONTINUED, discontinued)
199THE_NILFS_FNS(GC_RUNNING, gc_running)
200 200
201/* Minimum interval of periodical update of superblocks (in seconds) */ 201/* Minimum interval of periodical update of superblocks (in seconds) */
202#define NILFS_SB_FREQ 10 202#define NILFS_SB_FREQ 10
203#define NILFS_ALTSB_FREQ 60 /* spare superblock */ 203#define NILFS_ALTSB_FREQ 60 /* spare superblock */
204 204
205static inline int nilfs_sb_need_update(struct the_nilfs *nilfs)
206{
207 u64 t = get_seconds();
208 return t < nilfs->ns_sbwtime[0] ||
209 t > nilfs->ns_sbwtime[0] + NILFS_SB_FREQ;
210}
211
212static inline int nilfs_altsb_need_update(struct the_nilfs *nilfs)
213{
214 u64 t = get_seconds();
215 struct nilfs_super_block **sbp = nilfs->ns_sbp;
216 return sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ;
217}
218
205void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); 219void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
206struct the_nilfs *find_or_create_nilfs(struct block_device *); 220struct the_nilfs *find_or_create_nilfs(struct block_device *);
207void put_nilfs(struct the_nilfs *); 221void put_nilfs(struct the_nilfs *);
@@ -221,34 +235,21 @@ static inline void get_nilfs(struct the_nilfs *nilfs)
221 atomic_inc(&nilfs->ns_count); 235 atomic_inc(&nilfs->ns_count);
222} 236}
223 237
224static inline struct nilfs_sb_info *nilfs_get_writer(struct the_nilfs *nilfs)
225{
226 if (atomic_inc_and_test(&nilfs->ns_writer_refcount))
227 mutex_lock(&nilfs->ns_writer_mutex);
228 return nilfs->ns_writer;
229}
230
231static inline void nilfs_put_writer(struct the_nilfs *nilfs)
232{
233 if (atomic_add_negative(-1, &nilfs->ns_writer_refcount))
234 mutex_unlock(&nilfs->ns_writer_mutex);
235}
236
237static inline void 238static inline void
238nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) 239nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
239{ 240{
240 mutex_lock(&nilfs->ns_writer_mutex); 241 down_write(&nilfs->ns_writer_sem);
241 nilfs->ns_writer = sbi; 242 nilfs->ns_writer = sbi;
242 mutex_unlock(&nilfs->ns_writer_mutex); 243 up_write(&nilfs->ns_writer_sem);
243} 244}
244 245
245static inline void 246static inline void
246nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) 247nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
247{ 248{
248 mutex_lock(&nilfs->ns_writer_mutex); 249 down_write(&nilfs->ns_writer_sem);
249 if (sbi == nilfs->ns_writer) 250 if (sbi == nilfs->ns_writer)
250 nilfs->ns_writer = NULL; 251 nilfs->ns_writer = NULL;
251 mutex_unlock(&nilfs->ns_writer_mutex); 252 up_write(&nilfs->ns_writer_sem);
252} 253}
253 254
254static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) 255static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 3140a4429af1..4350d4993b18 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2076,14 +2076,6 @@ err_out:
2076 *ppos = pos; 2076 *ppos = pos;
2077 if (cached_page) 2077 if (cached_page)
2078 page_cache_release(cached_page); 2078 page_cache_release(cached_page);
2079 /* For now, when the user asks for O_SYNC, we actually give O_DSYNC. */
2080 if (likely(!status)) {
2081 if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(vi))) {
2082 if (!mapping->a_ops->writepage || !is_sync_kiocb(iocb))
2083 status = generic_osync_inode(vi, mapping,
2084 OSYNC_METADATA|OSYNC_DATA);
2085 }
2086 }
2087 pagevec_lru_add_file(&lru_pvec); 2079 pagevec_lru_add_file(&lru_pvec);
2088 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", 2080 ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
2089 written ? "written" : "status", (unsigned long)written, 2081 written ? "written" : "status", (unsigned long)written,
@@ -2145,8 +2137,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2145 mutex_lock(&inode->i_mutex); 2137 mutex_lock(&inode->i_mutex);
2146 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); 2138 ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
2147 mutex_unlock(&inode->i_mutex); 2139 mutex_unlock(&inode->i_mutex);
2148 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2140 if (ret > 0) {
2149 int err = sync_page_range(inode, mapping, pos, ret); 2141 int err = generic_write_sync(file, pos, ret);
2150 if (err < 0) 2142 if (err < 0)
2151 ret = err; 2143 ret = err;
2152 } 2144 }
@@ -2173,8 +2165,8 @@ static ssize_t ntfs_file_writev(struct file *file, const struct iovec *iov,
2173 if (ret == -EIOCBQUEUED) 2165 if (ret == -EIOCBQUEUED)
2174 ret = wait_on_sync_kiocb(&kiocb); 2166 ret = wait_on_sync_kiocb(&kiocb);
2175 mutex_unlock(&inode->i_mutex); 2167 mutex_unlock(&inode->i_mutex);
2176 if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2168 if (ret > 0) {
2177 int err = sync_page_range(inode, mapping, *ppos - ret, ret); 2169 int err = generic_write_sync(file, *ppos - ret, ret);
2178 if (err < 0) 2170 if (err < 0)
2179 ret = err; 2171 ret = err;
2180 } 2172 }
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 23bf68453d7d..1caa0ef0b2bb 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -384,13 +384,12 @@ unm_err_out:
384 * it is dirty in the inode meta data rather than the data page cache of the 384 * it is dirty in the inode meta data rather than the data page cache of the
385 * inode, and thus there are no data pages that need writing out. Therefore, a 385 * inode, and thus there are no data pages that need writing out. Therefore, a
386 * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the 386 * full mark_inode_dirty() is overkill. A mark_inode_dirty_sync(), on the
387 * other hand, is not sufficient, because I_DIRTY_DATASYNC needs to be set to 387 * other hand, is not sufficient, because ->write_inode needs to be called even
388 * ensure ->write_inode is called from generic_osync_inode() and this needs to 388 * in case of fdatasync. This needs to happen or the file data would not
389 * happen or the file data would not necessarily hit the device synchronously, 389 * necessarily hit the device synchronously, even though the vfs inode has the
390 * even though the vfs inode has the O_SYNC flag set. Also, I_DIRTY_DATASYNC 390 * O_SYNC flag set. Also, I_DIRTY_DATASYNC simply "feels" better than just
391 * simply "feels" better than just I_DIRTY_SYNC, since the file data has not 391 * I_DIRTY_SYNC, since the file data has not actually hit the block device yet,
392 * actually hit the block device yet, which is not what I_DIRTY_SYNC on its own 392 * which is not what I_DIRTY_SYNC on its own would suggest.
393 * would suggest.
394 */ 393 */
395void __mark_mft_record_dirty(ntfs_inode *ni) 394void __mark_mft_record_dirty(ntfs_inode *ni)
396{ 395{
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 1c9efb406a96..02bf17808bdc 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -325,6 +325,7 @@ clear_fields:
325} 325}
326 326
327static struct backing_dev_info dlmfs_backing_dev_info = { 327static struct backing_dev_info dlmfs_backing_dev_info = {
328 .name = "ocfs2-dlmfs",
328 .ra_pages = 0, /* No readahead */ 329 .ra_pages = 0, /* No readahead */
329 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 330 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
330}; 331};
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index aa501d3f93f1..221c5e98957b 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1871,8 +1871,7 @@ relock:
1871 goto out_dio; 1871 goto out_dio;
1872 } 1872 }
1873 } else { 1873 } else {
1874 written = generic_file_aio_write_nolock(iocb, iov, nr_segs, 1874 written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
1875 *ppos);
1876 } 1875 }
1877 1876
1878out_dio: 1877out_dio:
@@ -1880,18 +1879,21 @@ out_dio:
1880 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 1879 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
1881 1880
1882 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { 1881 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {
1883 /* 1882 ret = filemap_fdatawrite_range(file->f_mapping, pos,
1884 * The generic write paths have handled getting data 1883 pos + count - 1);
1885 * to disk, but since we don't make use of the dirty 1884 if (ret < 0)
1886 * inode list, a manual journal commit is necessary 1885 written = ret;
1887 * here. 1886
1888 */ 1887 if (!ret && (old_size != i_size_read(inode) ||
1889 if (old_size != i_size_read(inode) || 1888 old_clusters != OCFS2_I(inode)->ip_clusters)) {
1890 old_clusters != OCFS2_I(inode)->ip_clusters) {
1891 ret = jbd2_journal_force_commit(osb->journal->j_journal); 1889 ret = jbd2_journal_force_commit(osb->journal->j_journal);
1892 if (ret < 0) 1890 if (ret < 0)
1893 written = ret; 1891 written = ret;
1894 } 1892 }
1893
1894 if (!ret)
1895 ret = filemap_fdatawait_range(file->f_mapping, pos,
1896 pos + count - 1);
1895 } 1897 }
1896 1898
1897 /* 1899 /*
@@ -1991,31 +1993,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1991 1993
1992 if (ret > 0) { 1994 if (ret > 0) {
1993 unsigned long nr_pages; 1995 unsigned long nr_pages;
1996 int err;
1994 1997
1995 *ppos += ret;
1996 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1998 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1997 1999
1998 /* 2000 err = generic_write_sync(out, *ppos, ret);
1999 * If file or inode is SYNC and we actually wrote some data, 2001 if (err)
2000 * sync it. 2002 ret = err;
2001 */ 2003 else
2002 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 2004 *ppos += ret;
2003 int err;
2004
2005 mutex_lock(&inode->i_mutex);
2006 err = ocfs2_rw_lock(inode, 1);
2007 if (err < 0) {
2008 mlog_errno(err);
2009 } else {
2010 err = generic_osync_inode(inode, mapping,
2011 OSYNC_METADATA|OSYNC_DATA);
2012 ocfs2_rw_unlock(inode, 1);
2013 }
2014 mutex_unlock(&inode->i_mutex);
2015 2005
2016 if (err)
2017 ret = err;
2018 }
2019 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 2006 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
2020 } 2007 }
2021 2008
diff --git a/fs/open.c b/fs/open.c
index dd98e8076024..31191bf513e4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -199,7 +199,7 @@ out:
199int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, 199int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
200 struct file *filp) 200 struct file *filp)
201{ 201{
202 int err; 202 int ret;
203 struct iattr newattrs; 203 struct iattr newattrs;
204 204
205 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ 205 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
@@ -214,12 +214,14 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
214 } 214 }
215 215
216 /* Remove suid/sgid on truncate too */ 216 /* Remove suid/sgid on truncate too */
217 newattrs.ia_valid |= should_remove_suid(dentry); 217 ret = should_remove_suid(dentry);
218 if (ret)
219 newattrs.ia_valid |= ret | ATTR_FORCE;
218 220
219 mutex_lock(&dentry->d_inode->i_mutex); 221 mutex_lock(&dentry->d_inode->i_mutex);
220 err = notify_change(dentry, &newattrs); 222 ret = notify_change(dentry, &newattrs);
221 mutex_unlock(&dentry->d_inode->i_mutex); 223 mutex_unlock(&dentry->d_inode->i_mutex);
222 return err; 224 return ret;
223} 225}
224 226
225static long do_sys_truncate(const char __user *pathname, loff_t length) 227static long do_sys_truncate(const char __user *pathname, loff_t length)
@@ -957,6 +959,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
957 int error; 959 int error;
958 struct file *f; 960 struct file *f;
959 961
962 validate_creds(cred);
963
960 /* 964 /*
961 * We must always pass in a valid mount pointer. Historically 965 * We must always pass in a valid mount pointer. Historically
962 * callers got away with not passing it, but we must enforce this at 966 * callers got away with not passing it, but we must enforce this at
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ea4e6cb29e13..619ba99dfe39 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -248,11 +248,19 @@ ssize_t part_stat_show(struct device *dev,
248 part_stat_read(p, merges[WRITE]), 248 part_stat_read(p, merges[WRITE]),
249 (unsigned long long)part_stat_read(p, sectors[WRITE]), 249 (unsigned long long)part_stat_read(p, sectors[WRITE]),
250 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), 250 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
251 p->in_flight, 251 part_in_flight(p),
252 jiffies_to_msecs(part_stat_read(p, io_ticks)), 252 jiffies_to_msecs(part_stat_read(p, io_ticks)),
253 jiffies_to_msecs(part_stat_read(p, time_in_queue))); 253 jiffies_to_msecs(part_stat_read(p, time_in_queue)));
254} 254}
255 255
256ssize_t part_inflight_show(struct device *dev,
257 struct device_attribute *attr, char *buf)
258{
259 struct hd_struct *p = dev_to_part(dev);
260
261 return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
262}
263
256#ifdef CONFIG_FAIL_MAKE_REQUEST 264#ifdef CONFIG_FAIL_MAKE_REQUEST
257ssize_t part_fail_show(struct device *dev, 265ssize_t part_fail_show(struct device *dev,
258 struct device_attribute *attr, char *buf) 266 struct device_attribute *attr, char *buf)
@@ -281,6 +289,7 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
281static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 289static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
282static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 290static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
283static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 291static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
292static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
284#ifdef CONFIG_FAIL_MAKE_REQUEST 293#ifdef CONFIG_FAIL_MAKE_REQUEST
285static struct device_attribute dev_attr_fail = 294static struct device_attribute dev_attr_fail =
286 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 295 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -292,6 +301,7 @@ static struct attribute *part_attrs[] = {
292 &dev_attr_size.attr, 301 &dev_attr_size.attr,
293 &dev_attr_alignment_offset.attr, 302 &dev_attr_alignment_offset.attr,
294 &dev_attr_stat.attr, 303 &dev_attr_stat.attr,
304 &dev_attr_inflight.attr,
295#ifdef CONFIG_FAIL_MAKE_REQUEST 305#ifdef CONFIG_FAIL_MAKE_REQUEST
296 &dev_attr_fail.attr, 306 &dev_attr_fail.attr,
297#endif 307#endif
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 0ff7566c767c..a7f0110fca4c 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -46,6 +46,7 @@ static const struct super_operations ramfs_ops;
46static const struct inode_operations ramfs_dir_inode_operations; 46static const struct inode_operations ramfs_dir_inode_operations;
47 47
48static struct backing_dev_info ramfs_backing_dev_info = { 48static struct backing_dev_info ramfs_backing_dev_info = {
49 .name = "ramfs",
49 .ra_pages = 0, /* No readahead */ 50 .ra_pages = 0, /* No readahead */
50 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | 51 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
51 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | 52 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
diff --git a/fs/splice.c b/fs/splice.c
index 73766d24f97b..7394e9e17534 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -502,8 +502,10 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
502 len = left; 502 len = left;
503 503
504 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 504 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
505 if (ret > 0) 505 if (ret > 0) {
506 *ppos += ret; 506 *ppos += ret;
507 file_accessed(in);
508 }
507 509
508 return ret; 510 return ret;
509} 511}
@@ -963,8 +965,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
963 965
964 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 966 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
965 ret = file_remove_suid(out); 967 ret = file_remove_suid(out);
966 if (!ret) 968 if (!ret) {
969 file_update_time(out);
967 ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); 970 ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
971 }
968 mutex_unlock(&inode->i_mutex); 972 mutex_unlock(&inode->i_mutex);
969 } while (ret > 0); 973 } while (ret > 0);
970 splice_from_pipe_end(pipe, &sd); 974 splice_from_pipe_end(pipe, &sd);
@@ -976,25 +980,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
976 980
977 if (ret > 0) { 981 if (ret > 0) {
978 unsigned long nr_pages; 982 unsigned long nr_pages;
983 int err;
979 984
980 *ppos += ret;
981 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 985 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
982 986
983 /* 987 err = generic_write_sync(out, *ppos, ret);
984 * If file or inode is SYNC and we actually wrote some data, 988 if (err)
985 * sync it. 989 ret = err;
986 */ 990 else
987 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 991 *ppos += ret;
988 int err;
989
990 mutex_lock(&inode->i_mutex);
991 err = generic_osync_inode(inode, mapping,
992 OSYNC_METADATA|OSYNC_DATA);
993 mutex_unlock(&inode->i_mutex);
994
995 if (err)
996 ret = err;
997 }
998 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 992 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
999 } 993 }
1000 994
diff --git a/fs/super.c b/fs/super.c
index 2761d3e22ed9..9cda337ddae2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -62,9 +62,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
62 s = NULL; 62 s = NULL;
63 goto out; 63 goto out;
64 } 64 }
65 INIT_LIST_HEAD(&s->s_dirty);
66 INIT_LIST_HEAD(&s->s_io);
67 INIT_LIST_HEAD(&s->s_more_io);
68 INIT_LIST_HEAD(&s->s_files); 65 INIT_LIST_HEAD(&s->s_files);
69 INIT_LIST_HEAD(&s->s_instances); 66 INIT_LIST_HEAD(&s->s_instances);
70 INIT_HLIST_HEAD(&s->s_anon); 67 INIT_HLIST_HEAD(&s->s_anon);
@@ -171,7 +168,7 @@ int __put_super_and_need_restart(struct super_block *sb)
171 * Drops a temporary reference, frees superblock if there's no 168 * Drops a temporary reference, frees superblock if there's no
172 * references left. 169 * references left.
173 */ 170 */
174static void put_super(struct super_block *sb) 171void put_super(struct super_block *sb)
175{ 172{
176 spin_lock(&sb_lock); 173 spin_lock(&sb_lock);
177 __put_super(sb); 174 __put_super(sb);
diff --git a/fs/sync.c b/fs/sync.c
index 3422ba61d86d..192340930bb4 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -19,20 +19,22 @@
19 SYNC_FILE_RANGE_WAIT_AFTER) 19 SYNC_FILE_RANGE_WAIT_AFTER)
20 20
21/* 21/*
22 * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) 22 * Do the filesystem syncing work. For simple filesystems
23 * just dirties buffers with inodes so we have to submit IO for these buffers 23 * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to
24 * via __sync_blockdev(). This also speeds up the wait == 1 case since in that 24 * submit IO for these buffers via __sync_blockdev(). This also speeds up the
25 * case write_inode() functions do sync_dirty_buffer() and thus effectively 25 * wait == 1 case since in that case write_inode() functions do
26 * write one block at a time. 26 * sync_dirty_buffer() and thus effectively write one block at a time.
27 */ 27 */
28static int __sync_filesystem(struct super_block *sb, int wait) 28static int __sync_filesystem(struct super_block *sb, int wait)
29{ 29{
30 /* Avoid doing twice syncing and cache pruning for quota sync */ 30 /* Avoid doing twice syncing and cache pruning for quota sync */
31 if (!wait) 31 if (!wait) {
32 writeout_quota_sb(sb, -1); 32 writeout_quota_sb(sb, -1);
33 else 33 writeback_inodes_sb(sb);
34 } else {
34 sync_quota_sb(sb, -1); 35 sync_quota_sb(sb, -1);
35 sync_inodes_sb(sb, wait); 36 sync_inodes_sb(sb);
37 }
36 if (sb->s_op->sync_fs) 38 if (sb->s_op->sync_fs)
37 sb->s_op->sync_fs(sb, wait); 39 sb->s_op->sync_fs(sb, wait);
38 return __sync_blockdev(sb->s_bdev, wait); 40 return __sync_blockdev(sb->s_bdev, wait);
@@ -118,7 +120,7 @@ restart:
118 */ 120 */
119SYSCALL_DEFINE0(sync) 121SYSCALL_DEFINE0(sync)
120{ 122{
121 wakeup_pdflush(0); 123 wakeup_flusher_threads(0);
122 sync_filesystems(0); 124 sync_filesystems(0);
123 sync_filesystems(1); 125 sync_filesystems(1);
124 if (unlikely(laptop_mode)) 126 if (unlikely(laptop_mode))
@@ -176,19 +178,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
176} 178}
177 179
178/** 180/**
179 * vfs_fsync - perform a fsync or fdatasync on a file 181 * vfs_fsync_range - helper to sync a range of data & metadata to disk
180 * @file: file to sync 182 * @file: file to sync
181 * @dentry: dentry of @file 183 * @dentry: dentry of @file
182 * @data: only perform a fdatasync operation 184 * @start: offset in bytes of the beginning of data range to sync
185 * @end: offset in bytes of the end of data range (inclusive)
186 * @datasync: perform only datasync
183 * 187 *
184 * Write back data and metadata for @file to disk. If @datasync is 188 * Write back data in range @start..@end and metadata for @file to disk. If
185 * set only metadata needed to access modified file data is written. 189 * @datasync is set only metadata needed to access modified file data is
190 * written.
186 * 191 *
187 * In case this function is called from nfsd @file may be %NULL and 192 * In case this function is called from nfsd @file may be %NULL and
188 * only @dentry is set. This can only happen when the filesystem 193 * only @dentry is set. This can only happen when the filesystem
189 * implements the export_operations API. 194 * implements the export_operations API.
190 */ 195 */
191int vfs_fsync(struct file *file, struct dentry *dentry, int datasync) 196int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
197 loff_t end, int datasync)
192{ 198{
193 const struct file_operations *fop; 199 const struct file_operations *fop;
194 struct address_space *mapping; 200 struct address_space *mapping;
@@ -212,7 +218,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
212 goto out; 218 goto out;
213 } 219 }
214 220
215 ret = filemap_fdatawrite(mapping); 221 ret = filemap_write_and_wait_range(mapping, start, end);
216 222
217 /* 223 /*
218 * We need to protect against concurrent writers, which could cause 224 * We need to protect against concurrent writers, which could cause
@@ -223,12 +229,29 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
223 if (!ret) 229 if (!ret)
224 ret = err; 230 ret = err;
225 mutex_unlock(&mapping->host->i_mutex); 231 mutex_unlock(&mapping->host->i_mutex);
226 err = filemap_fdatawait(mapping); 232
227 if (!ret)
228 ret = err;
229out: 233out:
230 return ret; 234 return ret;
231} 235}
236EXPORT_SYMBOL(vfs_fsync_range);
237
238/**
239 * vfs_fsync - perform a fsync or fdatasync on a file
240 * @file: file to sync
241 * @dentry: dentry of @file
242 * @datasync: only perform a fdatasync operation
243 *
244 * Write back data and metadata for @file to disk. If @datasync is
245 * set only metadata needed to access modified file data is written.
246 *
247 * In case this function is called from nfsd @file may be %NULL and
248 * only @dentry is set. This can only happen when the filesystem
249 * implements the export_operations API.
250 */
251int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
252{
253 return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync);
254}
232EXPORT_SYMBOL(vfs_fsync); 255EXPORT_SYMBOL(vfs_fsync);
233 256
234static int do_fsync(unsigned int fd, int datasync) 257static int do_fsync(unsigned int fd, int datasync)
@@ -254,6 +277,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
254 return do_fsync(fd, 1); 277 return do_fsync(fd, 1);
255} 278}
256 279
280/**
281 * generic_write_sync - perform syncing after a write if file / inode is sync
282 * @file: file to which the write happened
283 * @pos: offset where the write started
284 * @count: length of the write
285 *
286 * This is just a simple wrapper about our general syncing function.
287 */
288int generic_write_sync(struct file *file, loff_t pos, loff_t count)
289{
290 if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host))
291 return 0;
292 return vfs_fsync_range(file, file->f_path.dentry, pos,
293 pos + count - 1, 1);
294}
295EXPORT_SYMBOL(generic_write_sync);
296
257/* 297/*
258 * sys_sync_file_range() permits finely controlled syncing over a segment of 298 * sys_sync_file_range() permits finely controlled syncing over a segment of
259 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is 299 * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 14f2d71ea3ce..0050fc40e8c9 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -760,6 +760,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
760const struct inode_operations sysfs_dir_inode_operations = { 760const struct inode_operations sysfs_dir_inode_operations = {
761 .lookup = sysfs_lookup, 761 .lookup = sysfs_lookup,
762 .setattr = sysfs_setattr, 762 .setattr = sysfs_setattr,
763 .setxattr = sysfs_setxattr,
763}; 764};
764 765
765static void remove_dir(struct sysfs_dirent *sd) 766static void remove_dir(struct sysfs_dirent *sd)
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 555f0ff988df..e28cecf179f5 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -18,6 +18,8 @@
18#include <linux/capability.h> 18#include <linux/capability.h>
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/xattr.h>
22#include <linux/security.h>
21#include "sysfs.h" 23#include "sysfs.h"
22 24
23extern struct super_block * sysfs_sb; 25extern struct super_block * sysfs_sb;
@@ -29,12 +31,14 @@ static const struct address_space_operations sysfs_aops = {
29}; 31};
30 32
31static struct backing_dev_info sysfs_backing_dev_info = { 33static struct backing_dev_info sysfs_backing_dev_info = {
34 .name = "sysfs",
32 .ra_pages = 0, /* No readahead */ 35 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 36 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
34}; 37};
35 38
36static const struct inode_operations sysfs_inode_operations ={ 39static const struct inode_operations sysfs_inode_operations ={
37 .setattr = sysfs_setattr, 40 .setattr = sysfs_setattr,
41 .setxattr = sysfs_setxattr,
38}; 42};
39 43
40int __init sysfs_inode_init(void) 44int __init sysfs_inode_init(void)
@@ -42,18 +46,37 @@ int __init sysfs_inode_init(void)
42 return bdi_init(&sysfs_backing_dev_info); 46 return bdi_init(&sysfs_backing_dev_info);
43} 47}
44 48
49struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
50{
51 struct sysfs_inode_attrs *attrs;
52 struct iattr *iattrs;
53
54 attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
55 if (!attrs)
56 return NULL;
57 iattrs = &attrs->ia_iattr;
58
59 /* assign default attributes */
60 iattrs->ia_mode = sd->s_mode;
61 iattrs->ia_uid = 0;
62 iattrs->ia_gid = 0;
63 iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
64
65 return attrs;
66}
45int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) 67int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
46{ 68{
47 struct inode * inode = dentry->d_inode; 69 struct inode * inode = dentry->d_inode;
48 struct sysfs_dirent * sd = dentry->d_fsdata; 70 struct sysfs_dirent * sd = dentry->d_fsdata;
49 struct iattr * sd_iattr; 71 struct sysfs_inode_attrs *sd_attrs;
72 struct iattr *iattrs;
50 unsigned int ia_valid = iattr->ia_valid; 73 unsigned int ia_valid = iattr->ia_valid;
51 int error; 74 int error;
52 75
53 if (!sd) 76 if (!sd)
54 return -EINVAL; 77 return -EINVAL;
55 78
56 sd_iattr = sd->s_iattr; 79 sd_attrs = sd->s_iattr;
57 80
58 error = inode_change_ok(inode, iattr); 81 error = inode_change_ok(inode, iattr);
59 if (error) 82 if (error)
@@ -65,42 +88,77 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
65 if (error) 88 if (error)
66 return error; 89 return error;
67 90
68 if (!sd_iattr) { 91 if (!sd_attrs) {
69 /* setting attributes for the first time, allocate now */ 92 /* setting attributes for the first time, allocate now */
70 sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL); 93 sd_attrs = sysfs_init_inode_attrs(sd);
71 if (!sd_iattr) 94 if (!sd_attrs)
72 return -ENOMEM; 95 return -ENOMEM;
73 /* assign default attributes */ 96 sd->s_iattr = sd_attrs;
74 sd_iattr->ia_mode = sd->s_mode; 97 } else {
75 sd_iattr->ia_uid = 0; 98 /* attributes were changed at least once in past */
76 sd_iattr->ia_gid = 0; 99 iattrs = &sd_attrs->ia_iattr;
77 sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; 100
78 sd->s_iattr = sd_iattr; 101 if (ia_valid & ATTR_UID)
102 iattrs->ia_uid = iattr->ia_uid;
103 if (ia_valid & ATTR_GID)
104 iattrs->ia_gid = iattr->ia_gid;
105 if (ia_valid & ATTR_ATIME)
106 iattrs->ia_atime = timespec_trunc(iattr->ia_atime,
107 inode->i_sb->s_time_gran);
108 if (ia_valid & ATTR_MTIME)
109 iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime,
110 inode->i_sb->s_time_gran);
111 if (ia_valid & ATTR_CTIME)
112 iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime,
113 inode->i_sb->s_time_gran);
114 if (ia_valid & ATTR_MODE) {
115 umode_t mode = iattr->ia_mode;
116
117 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
118 mode &= ~S_ISGID;
119 iattrs->ia_mode = sd->s_mode = mode;
120 }
79 } 121 }
122 return error;
123}
80 124
81 /* attributes were changed atleast once in past */ 125int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
82 126 size_t size, int flags)
83 if (ia_valid & ATTR_UID) 127{
84 sd_iattr->ia_uid = iattr->ia_uid; 128 struct sysfs_dirent *sd = dentry->d_fsdata;
85 if (ia_valid & ATTR_GID) 129 struct sysfs_inode_attrs *iattrs;
86 sd_iattr->ia_gid = iattr->ia_gid; 130 void *secdata;
87 if (ia_valid & ATTR_ATIME) 131 int error;
88 sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime, 132 u32 secdata_len = 0;
89 inode->i_sb->s_time_gran); 133
90 if (ia_valid & ATTR_MTIME) 134 if (!sd)
91 sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime, 135 return -EINVAL;
92 inode->i_sb->s_time_gran); 136 if (!sd->s_iattr)
93 if (ia_valid & ATTR_CTIME) 137 sd->s_iattr = sysfs_init_inode_attrs(sd);
94 sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime, 138 if (!sd->s_iattr)
95 inode->i_sb->s_time_gran); 139 return -ENOMEM;
96 if (ia_valid & ATTR_MODE) { 140
97 umode_t mode = iattr->ia_mode; 141 iattrs = sd->s_iattr;
98 142
99 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) 143 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
100 mode &= ~S_ISGID; 144 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
101 sd_iattr->ia_mode = sd->s_mode = mode; 145 error = security_inode_setsecurity(dentry->d_inode, suffix,
102 } 146 value, size, flags);
147 if (error)
148 goto out;
149 error = security_inode_getsecctx(dentry->d_inode,
150 &secdata, &secdata_len);
151 if (error)
152 goto out;
153 if (iattrs->ia_secdata)
154 security_release_secctx(iattrs->ia_secdata,
155 iattrs->ia_secdata_len);
156 iattrs->ia_secdata = secdata;
157 iattrs->ia_secdata_len = secdata_len;
103 158
159 } else
160 return -EINVAL;
161out:
104 return error; 162 return error;
105} 163}
106 164
@@ -146,6 +204,7 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd)
146static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) 204static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
147{ 205{
148 struct bin_attribute *bin_attr; 206 struct bin_attribute *bin_attr;
207 struct sysfs_inode_attrs *iattrs;
149 208
150 inode->i_private = sysfs_get(sd); 209 inode->i_private = sysfs_get(sd);
151 inode->i_mapping->a_ops = &sysfs_aops; 210 inode->i_mapping->a_ops = &sysfs_aops;
@@ -154,16 +213,20 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
154 inode->i_ino = sd->s_ino; 213 inode->i_ino = sd->s_ino;
155 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); 214 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
156 215
157 if (sd->s_iattr) { 216 iattrs = sd->s_iattr;
217 if (iattrs) {
158 /* sysfs_dirent has non-default attributes 218 /* sysfs_dirent has non-default attributes
159 * get them for the new inode from persistent copy 219 * get them for the new inode from persistent copy
160 * in sysfs_dirent 220 * in sysfs_dirent
161 */ 221 */
162 set_inode_attr(inode, sd->s_iattr); 222 set_inode_attr(inode, &iattrs->ia_iattr);
223 if (iattrs->ia_secdata)
224 security_inode_notifysecctx(inode,
225 iattrs->ia_secdata,
226 iattrs->ia_secdata_len);
163 } else 227 } else
164 set_default_inode_attr(inode, sd->s_mode); 228 set_default_inode_attr(inode, sd->s_mode);
165 229
166
167 /* initialize inode according to type */ 230 /* initialize inode according to type */
168 switch (sysfs_type(sd)) { 231 switch (sysfs_type(sd)) {
169 case SYSFS_DIR: 232 case SYSFS_DIR:
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 1d897ad808e0..c5081ad77026 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -16,6 +16,7 @@
16#include <linux/kobject.h> 16#include <linux/kobject.h>
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19#include <linux/security.h>
19 20
20#include "sysfs.h" 21#include "sysfs.h"
21 22
@@ -209,6 +210,7 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co
209} 210}
210 211
211const struct inode_operations sysfs_symlink_inode_operations = { 212const struct inode_operations sysfs_symlink_inode_operations = {
213 .setxattr = sysfs_setxattr,
212 .readlink = generic_readlink, 214 .readlink = generic_readlink,
213 .follow_link = sysfs_follow_link, 215 .follow_link = sysfs_follow_link,
214 .put_link = sysfs_put_link, 216 .put_link = sysfs_put_link,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3fa0d98481e2..af4c4e7482ac 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,6 +8,8 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/fs.h>
12
11struct sysfs_open_dirent; 13struct sysfs_open_dirent;
12 14
13/* type-specific structures for sysfs_dirent->s_* union members */ 15/* type-specific structures for sysfs_dirent->s_* union members */
@@ -31,6 +33,12 @@ struct sysfs_elem_bin_attr {
31 struct hlist_head buffers; 33 struct hlist_head buffers;
32}; 34};
33 35
36struct sysfs_inode_attrs {
37 struct iattr ia_iattr;
38 void *ia_secdata;
39 u32 ia_secdata_len;
40};
41
34/* 42/*
35 * sysfs_dirent - the building block of sysfs hierarchy. Each and 43 * sysfs_dirent - the building block of sysfs hierarchy. Each and
36 * every sysfs node is represented by single sysfs_dirent. 44 * every sysfs node is represented by single sysfs_dirent.
@@ -56,7 +64,7 @@ struct sysfs_dirent {
56 unsigned int s_flags; 64 unsigned int s_flags;
57 ino_t s_ino; 65 ino_t s_ino;
58 umode_t s_mode; 66 umode_t s_mode;
59 struct iattr *s_iattr; 67 struct sysfs_inode_attrs *s_iattr;
60}; 68};
61 69
62#define SD_DEACTIVATED_BIAS INT_MIN 70#define SD_DEACTIVATED_BIAS INT_MIN
@@ -148,6 +156,8 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
148struct inode *sysfs_get_inode(struct sysfs_dirent *sd); 156struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
149void sysfs_delete_inode(struct inode *inode); 157void sysfs_delete_inode(struct inode *inode);
150int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 158int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
159int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
160 size_t size, int flags);
151int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); 161int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
152int sysfs_inode_init(void); 162int sysfs_inode_init(void);
153 163
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index eaf6d891d46f..1c8991b0db13 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -65,26 +65,14 @@
65static int shrink_liability(struct ubifs_info *c, int nr_to_write) 65static int shrink_liability(struct ubifs_info *c, int nr_to_write)
66{ 66{
67 int nr_written; 67 int nr_written;
68 struct writeback_control wbc = {
69 .sync_mode = WB_SYNC_NONE,
70 .range_end = LLONG_MAX,
71 .nr_to_write = nr_to_write,
72 };
73
74 generic_sync_sb_inodes(c->vfs_sb, &wbc);
75 nr_written = nr_to_write - wbc.nr_to_write;
76 68
69 nr_written = writeback_inodes_sb(c->vfs_sb);
77 if (!nr_written) { 70 if (!nr_written) {
78 /* 71 /*
79 * Re-try again but wait on pages/inodes which are being 72 * Re-try again but wait on pages/inodes which are being
80 * written-back concurrently (e.g., by pdflush). 73 * written-back concurrently (e.g., by pdflush).
81 */ 74 */
82 memset(&wbc, 0, sizeof(struct writeback_control)); 75 nr_written = sync_inodes_sb(c->vfs_sb);
83 wbc.sync_mode = WB_SYNC_ALL;
84 wbc.range_end = LLONG_MAX;
85 wbc.nr_to_write = nr_to_write;
86 generic_sync_sb_inodes(c->vfs_sb, &wbc);
87 nr_written = nr_to_write - wbc.nr_to_write;
88 } 76 }
89 77
90 dbg_budg("%d pages were written back", nr_written); 78 dbg_budg("%d pages were written back", nr_written);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 26d2e0d80465..51763aa8f4de 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -438,12 +438,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
438{ 438{
439 int i, err; 439 int i, err;
440 struct ubifs_info *c = sb->s_fs_info; 440 struct ubifs_info *c = sb->s_fs_info;
441 struct writeback_control wbc = {
442 .sync_mode = WB_SYNC_ALL,
443 .range_start = 0,
444 .range_end = LLONG_MAX,
445 .nr_to_write = LONG_MAX,
446 };
447 441
448 /* 442 /*
449 * Zero @wait is just an advisory thing to help the file system shove 443 * Zero @wait is just an advisory thing to help the file system shove
@@ -462,7 +456,7 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
462 * the user be able to get more accurate results of 'statfs()' after 456 * the user be able to get more accurate results of 'statfs()' after
463 * they synchronize the file system. 457 * they synchronize the file system.
464 */ 458 */
465 generic_sync_sb_inodes(sb, &wbc); 459 sync_inodes_sb(sb);
466 460
467 /* 461 /*
468 * Synchronize write buffers, because 'ubifs_run_commit()' does not 462 * Synchronize write buffers, because 'ubifs_run_commit()' does not
@@ -1971,6 +1965,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1971 * 1965 *
1972 * Read-ahead will be disabled because @c->bdi.ra_pages is 0. 1966 * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
1973 */ 1967 */
1968 c->bdi.name = "ubifs",
1974 c->bdi.capabilities = BDI_CAP_MAP_COPY; 1969 c->bdi.capabilities = BDI_CAP_MAP_COPY;
1975 c->bdi.unplug_io_fn = default_unplug_io_fn; 1970 c->bdi.unplug_io_fn = default_unplug_io_fn;
1976 err = bdi_init(&c->bdi); 1971 err = bdi_init(&c->bdi);
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 1d2c570704c8..2ffdb6733af1 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -18,59 +18,6 @@
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/buffer_head.h> 19#include <linux/buffer_head.h>
20 20
21#if 0
22static uint8_t *udf_filead_read(struct inode *dir, uint8_t *tmpad,
23 uint8_t ad_size, struct kernel_lb_addr fe_loc,
24 int *pos, int *offset, struct buffer_head **bh,
25 int *error)
26{
27 int loffset = *offset;
28 int block;
29 uint8_t *ad;
30 int remainder;
31
32 *error = 0;
33
34 ad = (uint8_t *)(*bh)->b_data + *offset;
35 *offset += ad_size;
36
37 if (!ad) {
38 brelse(*bh);
39 *error = 1;
40 return NULL;
41 }
42
43 if (*offset == dir->i_sb->s_blocksize) {
44 brelse(*bh);
45 block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos);
46 if (!block)
47 return NULL;
48 *bh = udf_tread(dir->i_sb, block);
49 if (!*bh)
50 return NULL;
51 } else if (*offset > dir->i_sb->s_blocksize) {
52 ad = tmpad;
53
54 remainder = dir->i_sb->s_blocksize - loffset;
55 memcpy((uint8_t *)ad, (*bh)->b_data + loffset, remainder);
56
57 brelse(*bh);
58 block = udf_get_lb_pblock(dir->i_sb, fe_loc, ++*pos);
59 if (!block)
60 return NULL;
61 (*bh) = udf_tread(dir->i_sb, block);
62 if (!*bh)
63 return NULL;
64
65 memcpy((uint8_t *)ad + remainder, (*bh)->b_data,
66 ad_size - remainder);
67 *offset = ad_size - remainder;
68 }
69
70 return ad;
71}
72#endif
73
74struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, 21struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
75 struct udf_fileident_bh *fibh, 22 struct udf_fileident_bh *fibh,
76 struct fileIdentDesc *cfi, 23 struct fileIdentDesc *cfi,
@@ -248,39 +195,6 @@ struct fileIdentDesc *udf_get_fileident(void *buffer, int bufsize, int *offset)
248 return fi; 195 return fi;
249} 196}
250 197
251#if 0
252static struct extent_ad *udf_get_fileextent(void *buffer, int bufsize, int *offset)
253{
254 struct extent_ad *ext;
255 struct fileEntry *fe;
256 uint8_t *ptr;
257
258 if ((!buffer) || (!offset)) {
259 printk(KERN_ERR "udf: udf_get_fileextent() invalidparms\n");
260 return NULL;
261 }
262
263 fe = (struct fileEntry *)buffer;
264
265 if (fe->descTag.tagIdent != cpu_to_le16(TAG_IDENT_FE)) {
266 udf_debug("0x%x != TAG_IDENT_FE\n",
267 le16_to_cpu(fe->descTag.tagIdent));
268 return NULL;
269 }
270
271 ptr = (uint8_t *)(fe->extendedAttr) +
272 le32_to_cpu(fe->lengthExtendedAttr);
273
274 if ((*offset > 0) && (*offset < le32_to_cpu(fe->lengthAllocDescs)))
275 ptr += *offset;
276
277 ext = (struct extent_ad *)ptr;
278
279 *offset = *offset + sizeof(struct extent_ad);
280 return ext;
281}
282#endif
283
284struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset, 198struct short_ad *udf_get_fileshortad(uint8_t *ptr, int maxoffset, uint32_t *offset,
285 int inc) 199 int inc)
286{ 200{
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 7464305382b5..b80cbd78833c 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -193,9 +193,11 @@ int udf_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
193static int udf_release_file(struct inode *inode, struct file *filp) 193static int udf_release_file(struct inode *inode, struct file *filp)
194{ 194{
195 if (filp->f_mode & FMODE_WRITE) { 195 if (filp->f_mode & FMODE_WRITE) {
196 mutex_lock(&inode->i_mutex);
196 lock_kernel(); 197 lock_kernel();
197 udf_discard_prealloc(inode); 198 udf_discard_prealloc(inode);
198 unlock_kernel(); 199 unlock_kernel();
200 mutex_unlock(&inode->i_mutex);
199 } 201 }
200 return 0; 202 return 0;
201} 203}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index e7533f785636..6d24c2c63f93 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -90,19 +90,16 @@ no_delete:
90} 90}
91 91
92/* 92/*
93 * If we are going to release inode from memory, we discard preallocation and 93 * If we are going to release inode from memory, we truncate last inode extent
94 * truncate last inode extent to proper length. We could use drop_inode() but 94 * to proper length. We could use drop_inode() but it's called under inode_lock
95 * it's called under inode_lock and thus we cannot mark inode dirty there. We 95 * and thus we cannot mark inode dirty there. We use clear_inode() but we have
96 * use clear_inode() but we have to make sure to write inode as it's not written 96 * to make sure to write inode as it's not written automatically.
97 * automatically.
98 */ 97 */
99void udf_clear_inode(struct inode *inode) 98void udf_clear_inode(struct inode *inode)
100{ 99{
101 struct udf_inode_info *iinfo; 100 struct udf_inode_info *iinfo;
102 if (!(inode->i_sb->s_flags & MS_RDONLY)) { 101 if (!(inode->i_sb->s_flags & MS_RDONLY)) {
103 lock_kernel(); 102 lock_kernel();
104 /* Discard preallocation for directories, symlinks, etc. */
105 udf_discard_prealloc(inode);
106 udf_truncate_tail_extent(inode); 103 udf_truncate_tail_extent(inode);
107 unlock_kernel(); 104 unlock_kernel();
108 write_inode_now(inode, 0); 105 write_inode_now(inode, 0);
@@ -664,8 +661,12 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block,
664 udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum); 661 udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum);
665 662
666#ifdef UDF_PREALLOCATE 663#ifdef UDF_PREALLOCATE
667 /* preallocate blocks */ 664 /* We preallocate blocks only for regular files. It also makes sense
668 udf_prealloc_extents(inode, c, lastblock, laarr, &endnum); 665 * for directories but there's a problem when to drop the
666 * preallocation. We might use some delayed work for that but I feel
667 * it's overengineering for a filesystem like UDF. */
668 if (S_ISREG(inode->i_mode))
669 udf_prealloc_extents(inode, c, lastblock, laarr, &endnum);
669#endif 670#endif
670 671
671 /* merge any continuous blocks in laarr */ 672 /* merge any continuous blocks in laarr */
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 1b88fd5df05d..43e24a3b8e10 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -36,14 +36,10 @@ unsigned int udf_get_last_session(struct super_block *sb)
36 ms_info.addr_format = CDROM_LBA; 36 ms_info.addr_format = CDROM_LBA;
37 i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long)&ms_info); 37 i = ioctl_by_bdev(bdev, CDROMMULTISESSION, (unsigned long)&ms_info);
38 38
39#define WE_OBEY_THE_WRITTEN_STANDARDS 1
40
41 if (i == 0) { 39 if (i == 0) {
42 udf_debug("XA disk: %s, vol_desc_start=%d\n", 40 udf_debug("XA disk: %s, vol_desc_start=%d\n",
43 (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba); 41 (ms_info.xa_flag ? "yes" : "no"), ms_info.addr.lba);
44#if WE_OBEY_THE_WRITTEN_STANDARDS
45 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ 42 if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */
46#endif
47 vol_desc_start = ms_info.addr.lba; 43 vol_desc_start = ms_info.addr.lba;
48 } else { 44 } else {
49 udf_debug("CDROMMULTISESSION not supported: rc=%d\n", i); 45 udf_debug("CDROMMULTISESSION not supported: rc=%d\n", i);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 6a29fa34c478..21dad8c608f9 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -943,7 +943,6 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
943 pc->componentType = 1; 943 pc->componentType = 1;
944 pc->lengthComponentIdent = 0; 944 pc->lengthComponentIdent = 0;
945 pc->componentFileVersionNum = 0; 945 pc->componentFileVersionNum = 0;
946 pc += sizeof(struct pathComponent);
947 elen += sizeof(struct pathComponent); 946 elen += sizeof(struct pathComponent);
948 } 947 }
949 948
diff --git a/fs/xattr.c b/fs/xattr.c
index 1c3d0af59ddf..6d4f6d3449fb 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -66,22 +66,28 @@ xattr_permission(struct inode *inode, const char *name, int mask)
66 return inode_permission(inode, mask); 66 return inode_permission(inode, mask);
67} 67}
68 68
69int 69/**
70vfs_setxattr(struct dentry *dentry, const char *name, const void *value, 70 * __vfs_setxattr_noperm - perform setxattr operation without performing
71 size_t size, int flags) 71 * permission checks.
72 *
73 * @dentry - object to perform setxattr on
74 * @name - xattr name to set
75 * @value - value to set @name to
76 * @size - size of @value
77 * @flags - flags to pass into filesystem operations
78 *
79 * returns the result of the internal setxattr or setsecurity operations.
80 *
81 * This function requires the caller to lock the inode's i_mutex before it
82 * is executed. It also assumes that the caller will make the appropriate
83 * permission checks.
84 */
85int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
86 const void *value, size_t size, int flags)
72{ 87{
73 struct inode *inode = dentry->d_inode; 88 struct inode *inode = dentry->d_inode;
74 int error; 89 int error = -EOPNOTSUPP;
75
76 error = xattr_permission(inode, name, MAY_WRITE);
77 if (error)
78 return error;
79 90
80 mutex_lock(&inode->i_mutex);
81 error = security_inode_setxattr(dentry, name, value, size, flags);
82 if (error)
83 goto out;
84 error = -EOPNOTSUPP;
85 if (inode->i_op->setxattr) { 91 if (inode->i_op->setxattr) {
86 error = inode->i_op->setxattr(dentry, name, value, size, flags); 92 error = inode->i_op->setxattr(dentry, name, value, size, flags);
87 if (!error) { 93 if (!error) {
@@ -97,6 +103,29 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
97 if (!error) 103 if (!error)
98 fsnotify_xattr(dentry); 104 fsnotify_xattr(dentry);
99 } 105 }
106
107 return error;
108}
109
110
111int
112vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
113 size_t size, int flags)
114{
115 struct inode *inode = dentry->d_inode;
116 int error;
117
118 error = xattr_permission(inode, name, MAY_WRITE);
119 if (error)
120 return error;
121
122 mutex_lock(&inode->i_mutex);
123 error = security_inode_setxattr(dentry, name, value, size, flags);
124 if (error)
125 goto out;
126
127 error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
128
100out: 129out:
101 mutex_unlock(&inode->i_mutex); 130 mutex_unlock(&inode->i_mutex);
102 return error; 131 return error;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 8070b34cc287..6c32f1d63d8c 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -485,14 +485,6 @@ xfs_vn_put_link(
485} 485}
486 486
487STATIC int 487STATIC int
488xfs_vn_permission(
489 struct inode *inode,
490 int mask)
491{
492 return generic_permission(inode, mask, xfs_check_acl);
493}
494
495STATIC int
496xfs_vn_getattr( 488xfs_vn_getattr(
497 struct vfsmount *mnt, 489 struct vfsmount *mnt,
498 struct dentry *dentry, 490 struct dentry *dentry,
@@ -696,7 +688,7 @@ xfs_vn_fiemap(
696} 688}
697 689
698static const struct inode_operations xfs_inode_operations = { 690static const struct inode_operations xfs_inode_operations = {
699 .permission = xfs_vn_permission, 691 .check_acl = xfs_check_acl,
700 .truncate = xfs_vn_truncate, 692 .truncate = xfs_vn_truncate,
701 .getattr = xfs_vn_getattr, 693 .getattr = xfs_vn_getattr,
702 .setattr = xfs_vn_setattr, 694 .setattr = xfs_vn_setattr,
@@ -724,7 +716,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
724 .rmdir = xfs_vn_unlink, 716 .rmdir = xfs_vn_unlink,
725 .mknod = xfs_vn_mknod, 717 .mknod = xfs_vn_mknod,
726 .rename = xfs_vn_rename, 718 .rename = xfs_vn_rename,
727 .permission = xfs_vn_permission, 719 .check_acl = xfs_check_acl,
728 .getattr = xfs_vn_getattr, 720 .getattr = xfs_vn_getattr,
729 .setattr = xfs_vn_setattr, 721 .setattr = xfs_vn_setattr,
730 .setxattr = generic_setxattr, 722 .setxattr = generic_setxattr,
@@ -749,7 +741,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
749 .rmdir = xfs_vn_unlink, 741 .rmdir = xfs_vn_unlink,
750 .mknod = xfs_vn_mknod, 742 .mknod = xfs_vn_mknod,
751 .rename = xfs_vn_rename, 743 .rename = xfs_vn_rename,
752 .permission = xfs_vn_permission, 744 .check_acl = xfs_check_acl,
753 .getattr = xfs_vn_getattr, 745 .getattr = xfs_vn_getattr,
754 .setattr = xfs_vn_setattr, 746 .setattr = xfs_vn_setattr,
755 .setxattr = generic_setxattr, 747 .setxattr = generic_setxattr,
@@ -762,7 +754,7 @@ static const struct inode_operations xfs_symlink_inode_operations = {
762 .readlink = generic_readlink, 754 .readlink = generic_readlink,
763 .follow_link = xfs_vn_follow_link, 755 .follow_link = xfs_vn_follow_link,
764 .put_link = xfs_vn_put_link, 756 .put_link = xfs_vn_put_link,
765 .permission = xfs_vn_permission, 757 .check_acl = xfs_check_acl,
766 .getattr = xfs_vn_getattr, 758 .getattr = xfs_vn_getattr,
767 .setattr = xfs_vn_setattr, 759 .setattr = xfs_vn_setattr,
768 .setxattr = generic_setxattr, 760 .setxattr = generic_setxattr,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 7078974a6eee..fde63a3c4ecc 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -817,7 +817,8 @@ write_retry:
817 xfs_iunlock(xip, iolock); 817 xfs_iunlock(xip, iolock);
818 if (need_i_mutex) 818 if (need_i_mutex)
819 mutex_unlock(&inode->i_mutex); 819 mutex_unlock(&inode->i_mutex);
820 error2 = sync_page_range(inode, mapping, pos, ret); 820 error2 = filemap_write_and_wait_range(mapping, pos,
821 pos + ret - 1);
821 if (!error) 822 if (!error)
822 error = error2; 823 error = error2;
823 if (need_i_mutex) 824 if (need_i_mutex)