aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_elf.c28
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/char_dev.c40
-rw-r--r--fs/compat.c17
-rw-r--r--fs/configfs/inode.c1
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/exec.c63
-rw-r--r--fs/ext2/acl.c8
-rw-r--r--fs/ext2/acl.h4
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext2/namei.c8
-rw-r--r--fs/ext3/acl.c8
-rw-r--r--fs/ext3/acl.h4
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/namei.c4
-rw-r--r--fs/ext4/acl.c8
-rw-r--r--fs/ext4/acl.h4
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/fs-writeback.c1065
-rw-r--r--fs/fuse/inode.c1
-rw-r--r--fs/hugetlbfs/inode.c1
-rw-r--r--fs/jffs2/acl.c7
-rw-r--r--fs/jffs2/acl.h4
-rw-r--r--fs/jffs2/dir.c2
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jffs2/wbuf.c10
-rw-r--r--fs/jfs/acl.c7
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/namei.c2
-rw-r--r--fs/lockd/host.c14
-rw-r--r--fs/lockd/mon.c44
-rw-r--r--fs/locks.c4
-rw-r--r--fs/namei.c110
-rw-r--r--fs/nfs/Makefile3
-rw-r--r--fs/nfs/cache_lib.c140
-rw-r--r--fs/nfs/cache_lib.h27
-rw-r--r--fs/nfs/callback.c26
-rw-r--r--fs/nfs/client.c16
-rw-r--r--fs/nfs/direct.c3
-rw-r--r--fs/nfs/dns_resolve.c335
-rw-r--r--fs/nfs/dns_resolve.h14
-rw-r--r--fs/nfs/file.c49
-rw-r--r--fs/nfs/idmap.c6
-rw-r--r--fs/nfs/inode.c100
-rw-r--r--fs/nfs/internal.h39
-rw-r--r--fs/nfs/mount_clnt.c83
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs4namespace.c24
-rw-r--r--fs/nfs/nfs4proc.c40
-rw-r--r--fs/nfs/nfs4xdr.c1460
-rw-r--r--fs/nfs/super.c451
-rw-r--r--fs/nfs/write.c91
-rw-r--r--fs/nfsd/auth.c4
-rw-r--r--fs/nfsd/export.c14
-rw-r--r--fs/nfsd/nfs4idmap.c20
-rw-r--r--fs/nfsd/nfsctl.c21
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/vfs.c3
-rw-r--r--fs/nilfs2/btnode.c2
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/dcache.c11
-rw-r--r--fs/ocfs2/dlm/dlmfs.c1
-rw-r--r--fs/open.c12
-rw-r--r--fs/ramfs/inode.c1
-rw-r--r--fs/super.c5
-rw-r--r--fs/sync.c20
-rw-r--r--fs/sysfs/dir.c1
-rw-r--r--fs/sysfs/inode.c135
-rw-r--r--fs/sysfs/symlink.c2
-rw-r--r--fs/sysfs/sysfs.h12
-rw-r--r--fs/ubifs/budget.c16
-rw-r--r--fs/ubifs/super.c9
-rw-r--r--fs/xattr.c55
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c16
79 files changed, 3225 insertions, 1541 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index b7c1603cd4bd..7c1e65d54872 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -501,22 +501,22 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
501 } 501 }
502 } 502 }
503 503
504 /* 504 if (last_bss > elf_bss) {
505 * Now fill out the bss section. First pad the last page up 505 /*
506 * to the page boundary, and then perform a mmap to make sure 506 * Now fill out the bss section. First pad the last page up
507 * that there are zero-mapped pages up to and including the 507 * to the page boundary, and then perform a mmap to make sure
508 * last bss page. 508 * that there are zero-mapped pages up to and including the
509 */ 509 * last bss page.
510 if (padzero(elf_bss)) { 510 */
511 error = -EFAULT; 511 if (padzero(elf_bss)) {
512 goto out_close; 512 error = -EFAULT;
513 } 513 goto out_close;
514 }
514 515
515 /* What we have mapped so far */ 516 /* What we have mapped so far */
516 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); 517 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
517 518
518 /* Map the last of the bss segment */ 519 /* Map the last of the bss segment */
519 if (last_bss > elf_bss) {
520 down_write(&current->mm->mmap_sem); 520 down_write(&current->mm->mmap_sem);
521 error = do_brk(elf_bss, last_bss - elf_bss); 521 error = do_brk(elf_bss, last_bss - elf_bss);
522 up_write(&current->mm->mmap_sem); 522 up_write(&current->mm->mmap_sem);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e83be2e4602c..15831d5c7367 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1352,6 +1352,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
1352{ 1352{
1353 int err; 1353 int err;
1354 1354
1355 bdi->name = "btrfs";
1355 bdi->capabilities = BDI_CAP_MAP_COPY; 1356 bdi->capabilities = BDI_CAP_MAP_COPY;
1356 err = bdi_init(bdi); 1357 err = bdi_init(bdi);
1357 if (err) 1358 if (err)
diff --git a/fs/buffer.c b/fs/buffer.c
index 28f320fac4d4..90a98865b0cc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -281,7 +281,7 @@ static void free_more_memory(void)
281 struct zone *zone; 281 struct zone *zone;
282 int nid; 282 int nid;
283 283
284 wakeup_pdflush(1024); 284 wakeup_flusher_threads(1024);
285 yield(); 285 yield();
286 286
287 for_each_online_node(nid) { 287 for_each_online_node(nid) {
diff --git a/fs/char_dev.c b/fs/char_dev.c
index a173551e19d7..3cbc57f932d2 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -31,6 +31,7 @@
31 * - no readahead or I/O queue unplugging required 31 * - no readahead or I/O queue unplugging required
32 */ 32 */
33struct backing_dev_info directly_mappable_cdev_bdi = { 33struct backing_dev_info directly_mappable_cdev_bdi = {
34 .name = "char",
34 .capabilities = ( 35 .capabilities = (
35#ifdef CONFIG_MMU 36#ifdef CONFIG_MMU
36 /* permit private copies of the data to be taken */ 37 /* permit private copies of the data to be taken */
@@ -237,8 +238,10 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
237} 238}
238 239
239/** 240/**
240 * register_chrdev() - Register a major number for character devices. 241 * __register_chrdev() - create and register a cdev occupying a range of minors
241 * @major: major device number or 0 for dynamic allocation 242 * @major: major device number or 0 for dynamic allocation
243 * @baseminor: first of the requested range of minor numbers
244 * @count: the number of minor numbers required
242 * @name: name of this range of devices 245 * @name: name of this range of devices
243 * @fops: file operations associated with this devices 246 * @fops: file operations associated with this devices
244 * 247 *
@@ -254,19 +257,17 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
254 * /dev. It only helps to keep track of the different owners of devices. If 257 * /dev. It only helps to keep track of the different owners of devices. If
255 * your module name has only one type of devices it's ok to use e.g. the name 258 * your module name has only one type of devices it's ok to use e.g. the name
256 * of the module here. 259 * of the module here.
257 *
258 * This function registers a range of 256 minor numbers. The first minor number
259 * is 0.
260 */ 260 */
261int register_chrdev(unsigned int major, const char *name, 261int __register_chrdev(unsigned int major, unsigned int baseminor,
262 const struct file_operations *fops) 262 unsigned int count, const char *name,
263 const struct file_operations *fops)
263{ 264{
264 struct char_device_struct *cd; 265 struct char_device_struct *cd;
265 struct cdev *cdev; 266 struct cdev *cdev;
266 char *s; 267 char *s;
267 int err = -ENOMEM; 268 int err = -ENOMEM;
268 269
269 cd = __register_chrdev_region(major, 0, 256, name); 270 cd = __register_chrdev_region(major, baseminor, count, name);
270 if (IS_ERR(cd)) 271 if (IS_ERR(cd))
271 return PTR_ERR(cd); 272 return PTR_ERR(cd);
272 273
@@ -280,7 +281,7 @@ int register_chrdev(unsigned int major, const char *name,
280 for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/')) 281 for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/'))
281 *s = '!'; 282 *s = '!';
282 283
283 err = cdev_add(cdev, MKDEV(cd->major, 0), 256); 284 err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
284 if (err) 285 if (err)
285 goto out; 286 goto out;
286 287
@@ -290,7 +291,7 @@ int register_chrdev(unsigned int major, const char *name,
290out: 291out:
291 kobject_put(&cdev->kobj); 292 kobject_put(&cdev->kobj);
292out2: 293out2:
293 kfree(__unregister_chrdev_region(cd->major, 0, 256)); 294 kfree(__unregister_chrdev_region(cd->major, baseminor, count));
294 return err; 295 return err;
295} 296}
296 297
@@ -316,10 +317,23 @@ void unregister_chrdev_region(dev_t from, unsigned count)
316 } 317 }
317} 318}
318 319
319void unregister_chrdev(unsigned int major, const char *name) 320/**
321 * __unregister_chrdev - unregister and destroy a cdev
322 * @major: major device number
323 * @baseminor: first of the range of minor numbers
324 * @count: the number of minor numbers this cdev is occupying
325 * @name: name of this range of devices
326 *
327 * Unregister and destroy the cdev occupying the region described by
328 * @major, @baseminor and @count. This function undoes what
329 * __register_chrdev() did.
330 */
331void __unregister_chrdev(unsigned int major, unsigned int baseminor,
332 unsigned int count, const char *name)
320{ 333{
321 struct char_device_struct *cd; 334 struct char_device_struct *cd;
322 cd = __unregister_chrdev_region(major, 0, 256); 335
336 cd = __unregister_chrdev_region(major, baseminor, count);
323 if (cd && cd->cdev) 337 if (cd && cd->cdev)
324 cdev_del(cd->cdev); 338 cdev_del(cd->cdev);
325 kfree(cd); 339 kfree(cd);
@@ -568,6 +582,6 @@ EXPORT_SYMBOL(cdev_alloc);
568EXPORT_SYMBOL(cdev_del); 582EXPORT_SYMBOL(cdev_del);
569EXPORT_SYMBOL(cdev_add); 583EXPORT_SYMBOL(cdev_add);
570EXPORT_SYMBOL(cdev_index); 584EXPORT_SYMBOL(cdev_index);
571EXPORT_SYMBOL(register_chrdev); 585EXPORT_SYMBOL(__register_chrdev);
572EXPORT_SYMBOL(unregister_chrdev); 586EXPORT_SYMBOL(__unregister_chrdev);
573EXPORT_SYMBOL(directly_mappable_cdev_bdi); 587EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/fs/compat.c b/fs/compat.c
index 94502dab972a..6d6f98fe64a0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1485,20 +1485,15 @@ int compat_do_execve(char * filename,
1485 if (!bprm) 1485 if (!bprm)
1486 goto out_files; 1486 goto out_files;
1487 1487
1488 retval = -ERESTARTNOINTR; 1488 retval = prepare_bprm_creds(bprm);
1489 if (mutex_lock_interruptible(&current->cred_guard_mutex)) 1489 if (retval)
1490 goto out_free; 1490 goto out_free;
1491 current->in_execve = 1;
1492
1493 retval = -ENOMEM;
1494 bprm->cred = prepare_exec_creds();
1495 if (!bprm->cred)
1496 goto out_unlock;
1497 1491
1498 retval = check_unsafe_exec(bprm); 1492 retval = check_unsafe_exec(bprm);
1499 if (retval < 0) 1493 if (retval < 0)
1500 goto out_unlock; 1494 goto out_free;
1501 clear_in_exec = retval; 1495 clear_in_exec = retval;
1496 current->in_execve = 1;
1502 1497
1503 file = open_exec(filename); 1498 file = open_exec(filename);
1504 retval = PTR_ERR(file); 1499 retval = PTR_ERR(file);
@@ -1547,7 +1542,6 @@ int compat_do_execve(char * filename,
1547 /* execve succeeded */ 1542 /* execve succeeded */
1548 current->fs->in_exec = 0; 1543 current->fs->in_exec = 0;
1549 current->in_execve = 0; 1544 current->in_execve = 0;
1550 mutex_unlock(&current->cred_guard_mutex);
1551 acct_update_integrals(current); 1545 acct_update_integrals(current);
1552 free_bprm(bprm); 1546 free_bprm(bprm);
1553 if (displaced) 1547 if (displaced)
@@ -1567,10 +1561,7 @@ out_file:
1567out_unmark: 1561out_unmark:
1568 if (clear_in_exec) 1562 if (clear_in_exec)
1569 current->fs->in_exec = 0; 1563 current->fs->in_exec = 0;
1570
1571out_unlock:
1572 current->in_execve = 0; 1564 current->in_execve = 0;
1573 mutex_unlock(&current->cred_guard_mutex);
1574 1565
1575out_free: 1566out_free:
1576 free_bprm(bprm); 1567 free_bprm(bprm);
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4921e7426d95..a2f746066c5d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -51,6 +51,7 @@ static const struct address_space_operations configfs_aops = {
51}; 51};
52 52
53static struct backing_dev_info configfs_backing_dev_info = { 53static struct backing_dev_info configfs_backing_dev_info = {
54 .name = "configfs",
54 .ra_pages = 0, /* No readahead */ 55 .ra_pages = 0, /* No readahead */
55 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 56 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
56}; 57};
diff --git a/fs/dcache.c b/fs/dcache.c
index 9e5cd3c3a6ba..a100fa35a48f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,6 +32,7 @@
32#include <linux/swap.h> 32#include <linux/swap.h>
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/fs_struct.h> 34#include <linux/fs_struct.h>
35#include <linux/hardirq.h>
35#include "internal.h" 36#include "internal.h"
36 37
37int sysctl_vfs_cache_pressure __read_mostly = 100; 38int sysctl_vfs_cache_pressure __read_mostly = 100;
diff --git a/fs/exec.c b/fs/exec.c
index fb4f3cdda78c..172ceb6edde4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1016,6 +1016,35 @@ out:
1016EXPORT_SYMBOL(flush_old_exec); 1016EXPORT_SYMBOL(flush_old_exec);
1017 1017
1018/* 1018/*
1019 * Prepare credentials and lock ->cred_guard_mutex.
1020 * install_exec_creds() commits the new creds and drops the lock.
1021 * Or, if exec fails before, free_bprm() should release ->cred and
1022 * and unlock.
1023 */
1024int prepare_bprm_creds(struct linux_binprm *bprm)
1025{
1026 if (mutex_lock_interruptible(&current->cred_guard_mutex))
1027 return -ERESTARTNOINTR;
1028
1029 bprm->cred = prepare_exec_creds();
1030 if (likely(bprm->cred))
1031 return 0;
1032
1033 mutex_unlock(&current->cred_guard_mutex);
1034 return -ENOMEM;
1035}
1036
1037void free_bprm(struct linux_binprm *bprm)
1038{
1039 free_arg_pages(bprm);
1040 if (bprm->cred) {
1041 mutex_unlock(&current->cred_guard_mutex);
1042 abort_creds(bprm->cred);
1043 }
1044 kfree(bprm);
1045}
1046
1047/*
1019 * install the new credentials for this executable 1048 * install the new credentials for this executable
1020 */ 1049 */
1021void install_exec_creds(struct linux_binprm *bprm) 1050void install_exec_creds(struct linux_binprm *bprm)
@@ -1024,12 +1053,13 @@ void install_exec_creds(struct linux_binprm *bprm)
1024 1053
1025 commit_creds(bprm->cred); 1054 commit_creds(bprm->cred);
1026 bprm->cred = NULL; 1055 bprm->cred = NULL;
1027 1056 /*
1028 /* cred_guard_mutex must be held at least to this point to prevent 1057 * cred_guard_mutex must be held at least to this point to prevent
1029 * ptrace_attach() from altering our determination of the task's 1058 * ptrace_attach() from altering our determination of the task's
1030 * credentials; any time after this it may be unlocked */ 1059 * credentials; any time after this it may be unlocked.
1031 1060 */
1032 security_bprm_committed_creds(bprm); 1061 security_bprm_committed_creds(bprm);
1062 mutex_unlock(&current->cred_guard_mutex);
1033} 1063}
1034EXPORT_SYMBOL(install_exec_creds); 1064EXPORT_SYMBOL(install_exec_creds);
1035 1065
@@ -1246,14 +1276,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1246 1276
1247EXPORT_SYMBOL(search_binary_handler); 1277EXPORT_SYMBOL(search_binary_handler);
1248 1278
1249void free_bprm(struct linux_binprm *bprm)
1250{
1251 free_arg_pages(bprm);
1252 if (bprm->cred)
1253 abort_creds(bprm->cred);
1254 kfree(bprm);
1255}
1256
1257/* 1279/*
1258 * sys_execve() executes a new program. 1280 * sys_execve() executes a new program.
1259 */ 1281 */
@@ -1277,20 +1299,15 @@ int do_execve(char * filename,
1277 if (!bprm) 1299 if (!bprm)
1278 goto out_files; 1300 goto out_files;
1279 1301
1280 retval = -ERESTARTNOINTR; 1302 retval = prepare_bprm_creds(bprm);
1281 if (mutex_lock_interruptible(&current->cred_guard_mutex)) 1303 if (retval)
1282 goto out_free; 1304 goto out_free;
1283 current->in_execve = 1;
1284
1285 retval = -ENOMEM;
1286 bprm->cred = prepare_exec_creds();
1287 if (!bprm->cred)
1288 goto out_unlock;
1289 1305
1290 retval = check_unsafe_exec(bprm); 1306 retval = check_unsafe_exec(bprm);
1291 if (retval < 0) 1307 if (retval < 0)
1292 goto out_unlock; 1308 goto out_free;
1293 clear_in_exec = retval; 1309 clear_in_exec = retval;
1310 current->in_execve = 1;
1294 1311
1295 file = open_exec(filename); 1312 file = open_exec(filename);
1296 retval = PTR_ERR(file); 1313 retval = PTR_ERR(file);
@@ -1340,7 +1357,6 @@ int do_execve(char * filename,
1340 /* execve succeeded */ 1357 /* execve succeeded */
1341 current->fs->in_exec = 0; 1358 current->fs->in_exec = 0;
1342 current->in_execve = 0; 1359 current->in_execve = 0;
1343 mutex_unlock(&current->cred_guard_mutex);
1344 acct_update_integrals(current); 1360 acct_update_integrals(current);
1345 free_bprm(bprm); 1361 free_bprm(bprm);
1346 if (displaced) 1362 if (displaced)
@@ -1360,10 +1376,7 @@ out_file:
1360out_unmark: 1376out_unmark:
1361 if (clear_in_exec) 1377 if (clear_in_exec)
1362 current->fs->in_exec = 0; 1378 current->fs->in_exec = 0;
1363
1364out_unlock:
1365 current->in_execve = 0; 1379 current->in_execve = 0;
1366 mutex_unlock(&current->cred_guard_mutex);
1367 1380
1368out_free: 1381out_free:
1369 free_bprm(bprm); 1382 free_bprm(bprm);
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index d636e1297cad..a63d44256a70 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -230,7 +230,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
230 return error; 230 return error;
231} 231}
232 232
233static int 233int
234ext2_check_acl(struct inode *inode, int mask) 234ext2_check_acl(struct inode *inode, int mask)
235{ 235{
236 struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); 236 struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
@@ -246,12 +246,6 @@ ext2_check_acl(struct inode *inode, int mask)
246 return -EAGAIN; 246 return -EAGAIN;
247} 247}
248 248
249int
250ext2_permission(struct inode *inode, int mask)
251{
252 return generic_permission(inode, mask, ext2_check_acl);
253}
254
255/* 249/*
256 * Initialize the ACLs of a new inode. Called from ext2_new_inode. 250 * Initialize the ACLs of a new inode. Called from ext2_new_inode.
257 * 251 *
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index ecefe478898f..3ff6cbb9ac44 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,13 +54,13 @@ static inline int ext2_acl_count(size_t size)
54#ifdef CONFIG_EXT2_FS_POSIX_ACL 54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext2_permission (struct inode *, int); 57extern int ext2_check_acl (struct inode *, int);
58extern int ext2_acl_chmod (struct inode *); 58extern int ext2_acl_chmod (struct inode *);
59extern int ext2_init_acl (struct inode *, struct inode *); 59extern int ext2_init_acl (struct inode *, struct inode *);
60 60
61#else 61#else
62#include <linux/sched.h> 62#include <linux/sched.h>
63#define ext2_permission NULL 63#define ext2_check_acl NULL
64#define ext2_get_acl NULL 64#define ext2_get_acl NULL
65#define ext2_set_acl NULL 65#define ext2_set_acl NULL
66 66
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2b9e47dc9222..a2f3afd1a1c1 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -85,6 +85,6 @@ const struct inode_operations ext2_file_inode_operations = {
85 .removexattr = generic_removexattr, 85 .removexattr = generic_removexattr,
86#endif 86#endif
87 .setattr = ext2_setattr, 87 .setattr = ext2_setattr,
88 .permission = ext2_permission, 88 .check_acl = ext2_check_acl,
89 .fiemap = ext2_fiemap, 89 .fiemap = ext2_fiemap,
90}; 90};
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index e1dedb0f7873..23701f289e98 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -362,6 +362,10 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
362 if (dir_de) { 362 if (dir_de) {
363 if (old_dir != new_dir) 363 if (old_dir != new_dir)
364 ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0); 364 ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
365 else {
366 kunmap(dir_page);
367 page_cache_release(dir_page);
368 }
365 inode_dec_link_count(old_dir); 369 inode_dec_link_count(old_dir);
366 } 370 }
367 return 0; 371 return 0;
@@ -396,7 +400,7 @@ const struct inode_operations ext2_dir_inode_operations = {
396 .removexattr = generic_removexattr, 400 .removexattr = generic_removexattr,
397#endif 401#endif
398 .setattr = ext2_setattr, 402 .setattr = ext2_setattr,
399 .permission = ext2_permission, 403 .check_acl = ext2_check_acl,
400}; 404};
401 405
402const struct inode_operations ext2_special_inode_operations = { 406const struct inode_operations ext2_special_inode_operations = {
@@ -407,5 +411,5 @@ const struct inode_operations ext2_special_inode_operations = {
407 .removexattr = generic_removexattr, 411 .removexattr = generic_removexattr,
408#endif 412#endif
409 .setattr = ext2_setattr, 413 .setattr = ext2_setattr,
410 .permission = ext2_permission, 414 .check_acl = ext2_check_acl,
411}; 415};
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index e167bae37ef0..c9b0df376b5f 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -238,7 +238,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
238 return error; 238 return error;
239} 239}
240 240
241static int 241int
242ext3_check_acl(struct inode *inode, int mask) 242ext3_check_acl(struct inode *inode, int mask)
243{ 243{
244 struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); 244 struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
@@ -254,12 +254,6 @@ ext3_check_acl(struct inode *inode, int mask)
254 return -EAGAIN; 254 return -EAGAIN;
255} 255}
256 256
257int
258ext3_permission(struct inode *inode, int mask)
259{
260 return generic_permission(inode, mask, ext3_check_acl);
261}
262
263/* 257/*
264 * Initialize the ACLs of a new inode. Called from ext3_new_inode. 258 * Initialize the ACLs of a new inode. Called from ext3_new_inode.
265 * 259 *
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 07d15a3a5969..597334626de9 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size)
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext3_permission (struct inode *, int); 57extern int ext3_check_acl (struct inode *, int);
58extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
60 60
61#else /* CONFIG_EXT3_FS_POSIX_ACL */ 61#else /* CONFIG_EXT3_FS_POSIX_ACL */
62#include <linux/sched.h> 62#include <linux/sched.h>
63#define ext3_permission NULL 63#define ext3_check_acl NULL
64 64
65static inline int 65static inline int
66ext3_acl_chmod(struct inode *inode) 66ext3_acl_chmod(struct inode *inode)
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 5b49704b231b..299253214789 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -137,7 +137,7 @@ const struct inode_operations ext3_file_inode_operations = {
137 .listxattr = ext3_listxattr, 137 .listxattr = ext3_listxattr,
138 .removexattr = generic_removexattr, 138 .removexattr = generic_removexattr,
139#endif 139#endif
140 .permission = ext3_permission, 140 .check_acl = ext3_check_acl,
141 .fiemap = ext3_fiemap, 141 .fiemap = ext3_fiemap,
142}; 142};
143 143
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 6ff7b9730234..aad6400c9b77 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2445,7 +2445,7 @@ const struct inode_operations ext3_dir_inode_operations = {
2445 .listxattr = ext3_listxattr, 2445 .listxattr = ext3_listxattr,
2446 .removexattr = generic_removexattr, 2446 .removexattr = generic_removexattr,
2447#endif 2447#endif
2448 .permission = ext3_permission, 2448 .check_acl = ext3_check_acl,
2449}; 2449};
2450 2450
2451const struct inode_operations ext3_special_inode_operations = { 2451const struct inode_operations ext3_special_inode_operations = {
@@ -2456,5 +2456,5 @@ const struct inode_operations ext3_special_inode_operations = {
2456 .listxattr = ext3_listxattr, 2456 .listxattr = ext3_listxattr,
2457 .removexattr = generic_removexattr, 2457 .removexattr = generic_removexattr,
2458#endif 2458#endif
2459 .permission = ext3_permission, 2459 .check_acl = ext3_check_acl,
2460}; 2460};
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index f6d8967149ca..0df88b2a69b0 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -236,7 +236,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
236 return error; 236 return error;
237} 237}
238 238
239static int 239int
240ext4_check_acl(struct inode *inode, int mask) 240ext4_check_acl(struct inode *inode, int mask)
241{ 241{
242 struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); 242 struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
@@ -252,12 +252,6 @@ ext4_check_acl(struct inode *inode, int mask)
252 return -EAGAIN; 252 return -EAGAIN;
253} 253}
254 254
255int
256ext4_permission(struct inode *inode, int mask)
257{
258 return generic_permission(inode, mask, ext4_check_acl);
259}
260
261/* 255/*
262 * Initialize the ACLs of a new inode. Called from ext4_new_inode. 256 * Initialize the ACLs of a new inode. Called from ext4_new_inode.
263 * 257 *
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 949789d2bba6..9d843d5deac4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,13 +54,13 @@ static inline int ext4_acl_count(size_t size)
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext4_permission(struct inode *, int); 57extern int ext4_check_acl(struct inode *, int);
58extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); 59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
60 60
61#else /* CONFIG_EXT4_FS_POSIX_ACL */ 61#else /* CONFIG_EXT4_FS_POSIX_ACL */
62#include <linux/sched.h> 62#include <linux/sched.h>
63#define ext4_permission NULL 63#define ext4_check_acl NULL
64 64
65static inline int 65static inline int
66ext4_acl_chmod(struct inode *inode) 66ext4_acl_chmod(struct inode *inode)
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3f1873fef1c6..27f3c5354c0e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -207,7 +207,7 @@ const struct inode_operations ext4_file_inode_operations = {
207 .listxattr = ext4_listxattr, 207 .listxattr = ext4_listxattr,
208 .removexattr = generic_removexattr, 208 .removexattr = generic_removexattr,
209#endif 209#endif
210 .permission = ext4_permission, 210 .check_acl = ext4_check_acl,
211 .fallocate = ext4_fallocate, 211 .fallocate = ext4_fallocate,
212 .fiemap = ext4_fiemap, 212 .fiemap = ext4_fiemap,
213}; 213};
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index de04013d16ff..114abe5d2c1d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2536,7 +2536,7 @@ const struct inode_operations ext4_dir_inode_operations = {
2536 .listxattr = ext4_listxattr, 2536 .listxattr = ext4_listxattr,
2537 .removexattr = generic_removexattr, 2537 .removexattr = generic_removexattr,
2538#endif 2538#endif
2539 .permission = ext4_permission, 2539 .check_acl = ext4_check_acl,
2540 .fiemap = ext4_fiemap, 2540 .fiemap = ext4_fiemap,
2541}; 2541};
2542 2542
@@ -2548,5 +2548,5 @@ const struct inode_operations ext4_special_inode_operations = {
2548 .listxattr = ext4_listxattr, 2548 .listxattr = ext4_listxattr,
2549 .removexattr = generic_removexattr, 2549 .removexattr = generic_removexattr,
2550#endif 2550#endif
2551 .permission = ext4_permission, 2551 .check_acl = ext4_check_acl,
2552}; 2552};
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c54226be5294..da86ef58e427 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -19,171 +19,223 @@
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/mm.h> 21#include <linux/mm.h>
22#include <linux/kthread.h>
23#include <linux/freezer.h>
22#include <linux/writeback.h> 24#include <linux/writeback.h>
23#include <linux/blkdev.h> 25#include <linux/blkdev.h>
24#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
25#include <linux/buffer_head.h> 27#include <linux/buffer_head.h>
26#include "internal.h" 28#include "internal.h"
27 29
30#define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info)
28 31
29/** 32/*
30 * writeback_acquire - attempt to get exclusive writeback access to a device 33 * We don't actually have pdflush, but this one is exported though /proc...
31 * @bdi: the device's backing_dev_info structure
32 *
33 * It is a waste of resources to have more than one pdflush thread blocked on
34 * a single request queue. Exclusion at the request_queue level is obtained
35 * via a flag in the request_queue's backing_dev_info.state.
36 *
37 * Non-request_queue-backed address_spaces will share default_backing_dev_info,
38 * unless they implement their own. Which is somewhat inefficient, as this
39 * may prevent concurrent writeback against multiple devices.
40 */ 34 */
41static int writeback_acquire(struct backing_dev_info *bdi) 35int nr_pdflush_threads;
36
37/*
38 * Work items for the bdi_writeback threads
39 */
40struct bdi_work {
41 struct list_head list;
42 struct list_head wait_list;
43 struct rcu_head rcu_head;
44
45 unsigned long seen;
46 atomic_t pending;
47
48 struct super_block *sb;
49 unsigned long nr_pages;
50 enum writeback_sync_modes sync_mode;
51
52 unsigned long state;
53};
54
55enum {
56 WS_USED_B = 0,
57 WS_ONSTACK_B,
58};
59
60#define WS_USED (1 << WS_USED_B)
61#define WS_ONSTACK (1 << WS_ONSTACK_B)
62
63static inline bool bdi_work_on_stack(struct bdi_work *work)
42{ 64{
43 return !test_and_set_bit(BDI_pdflush, &bdi->state); 65 return test_bit(WS_ONSTACK_B, &work->state);
66}
67
68static inline void bdi_work_init(struct bdi_work *work,
69 struct writeback_control *wbc)
70{
71 INIT_RCU_HEAD(&work->rcu_head);
72 work->sb = wbc->sb;
73 work->nr_pages = wbc->nr_to_write;
74 work->sync_mode = wbc->sync_mode;
75 work->state = WS_USED;
76}
77
78static inline void bdi_work_init_on_stack(struct bdi_work *work,
79 struct writeback_control *wbc)
80{
81 bdi_work_init(work, wbc);
82 work->state |= WS_ONSTACK;
44} 83}
45 84
46/** 85/**
47 * writeback_in_progress - determine whether there is writeback in progress 86 * writeback_in_progress - determine whether there is writeback in progress
48 * @bdi: the device's backing_dev_info structure. 87 * @bdi: the device's backing_dev_info structure.
49 * 88 *
50 * Determine whether there is writeback in progress against a backing device. 89 * Determine whether there is writeback waiting to be handled against a
90 * backing device.
51 */ 91 */
52int writeback_in_progress(struct backing_dev_info *bdi) 92int writeback_in_progress(struct backing_dev_info *bdi)
53{ 93{
54 return test_bit(BDI_pdflush, &bdi->state); 94 return !list_empty(&bdi->work_list);
55} 95}
56 96
57/** 97static void bdi_work_clear(struct bdi_work *work)
58 * writeback_release - relinquish exclusive writeback access against a device.
59 * @bdi: the device's backing_dev_info structure
60 */
61static void writeback_release(struct backing_dev_info *bdi)
62{ 98{
63 BUG_ON(!writeback_in_progress(bdi)); 99 clear_bit(WS_USED_B, &work->state);
64 clear_bit(BDI_pdflush, &bdi->state); 100 smp_mb__after_clear_bit();
101 wake_up_bit(&work->state, WS_USED_B);
65} 102}
66 103
67static noinline void block_dump___mark_inode_dirty(struct inode *inode) 104static void bdi_work_free(struct rcu_head *head)
68{ 105{
69 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 106 struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
70 struct dentry *dentry;
71 const char *name = "?";
72 107
73 dentry = d_find_alias(inode); 108 if (!bdi_work_on_stack(work))
74 if (dentry) { 109 kfree(work);
75 spin_lock(&dentry->d_lock); 110 else
76 name = (const char *) dentry->d_name.name; 111 bdi_work_clear(work);
77 }
78 printk(KERN_DEBUG
79 "%s(%d): dirtied inode %lu (%s) on %s\n",
80 current->comm, task_pid_nr(current), inode->i_ino,
81 name, inode->i_sb->s_id);
82 if (dentry) {
83 spin_unlock(&dentry->d_lock);
84 dput(dentry);
85 }
86 }
87} 112}
88 113
89/** 114static void wb_work_complete(struct bdi_work *work)
90 * __mark_inode_dirty - internal function
91 * @inode: inode to mark
92 * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
93 * Mark an inode as dirty. Callers should use mark_inode_dirty or
94 * mark_inode_dirty_sync.
95 *
96 * Put the inode on the super block's dirty list.
97 *
98 * CAREFUL! We mark it dirty unconditionally, but move it onto the
99 * dirty list only if it is hashed or if it refers to a blockdev.
100 * If it was not hashed, it will never be added to the dirty list
101 * even if it is later hashed, as it will have been marked dirty already.
102 *
103 * In short, make sure you hash any inodes _before_ you start marking
104 * them dirty.
105 *
106 * This function *must* be atomic for the I_DIRTY_PAGES case -
107 * set_page_dirty() is called under spinlock in several places.
108 *
109 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
110 * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
111 * the kernel-internal blockdev inode represents the dirtying time of the
112 * blockdev's pages. This is why for I_DIRTY_PAGES we always use
113 * page->mapping->host, so the page-dirtying time is recorded in the internal
114 * blockdev inode.
115 */
116void __mark_inode_dirty(struct inode *inode, int flags)
117{ 115{
118 struct super_block *sb = inode->i_sb; 116 const enum writeback_sync_modes sync_mode = work->sync_mode;
119 117
120 /* 118 /*
121 * Don't do this for I_DIRTY_PAGES - that doesn't actually 119 * For allocated work, we can clear the done/seen bit right here.
122 * dirty the inode itself 120 * For on-stack work, we need to postpone both the clear and free
121 * to after the RCU grace period, since the stack could be invalidated
122 * as soon as bdi_work_clear() has done the wakeup.
123 */ 123 */
124 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 124 if (!bdi_work_on_stack(work))
125 if (sb->s_op->dirty_inode) 125 bdi_work_clear(work);
126 sb->s_op->dirty_inode(inode); 126 if (sync_mode == WB_SYNC_NONE || bdi_work_on_stack(work))
127 } 127 call_rcu(&work->rcu_head, bdi_work_free);
128}
128 129
130static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
131{
129 /* 132 /*
130 * make sure that changes are seen by all cpus before we test i_state 133 * The caller has retrieved the work arguments from this work,
131 * -- mikulas 134 * drop our reference. If this is the last ref, delete and free it
132 */ 135 */
133 smp_mb(); 136 if (atomic_dec_and_test(&work->pending)) {
137 struct backing_dev_info *bdi = wb->bdi;
134 138
135 /* avoid the locking if we can */ 139 spin_lock(&bdi->wb_lock);
136 if ((inode->i_state & flags) == flags) 140 list_del_rcu(&work->list);
137 return; 141 spin_unlock(&bdi->wb_lock);
138 142
139 if (unlikely(block_dump)) 143 wb_work_complete(work);
140 block_dump___mark_inode_dirty(inode); 144 }
141 145}
142 spin_lock(&inode_lock);
143 if ((inode->i_state & flags) != flags) {
144 const int was_dirty = inode->i_state & I_DIRTY;
145 146
146 inode->i_state |= flags; 147static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
148{
149 if (work) {
150 work->seen = bdi->wb_mask;
151 BUG_ON(!work->seen);
152 atomic_set(&work->pending, bdi->wb_cnt);
153 BUG_ON(!bdi->wb_cnt);
147 154
148 /* 155 /*
149 * If the inode is being synced, just update its dirty state. 156 * Make sure stores are seen before it appears on the list
150 * The unlocker will place the inode on the appropriate
151 * superblock list, based upon its state.
152 */ 157 */
153 if (inode->i_state & I_SYNC) 158 smp_mb();
154 goto out;
155 159
156 /* 160 spin_lock(&bdi->wb_lock);
157 * Only add valid (hashed) inodes to the superblock's 161 list_add_tail_rcu(&work->list, &bdi->work_list);
158 * dirty list. Add blockdev inodes as well. 162 spin_unlock(&bdi->wb_lock);
159 */ 163 }
160 if (!S_ISBLK(inode->i_mode)) { 164
161 if (hlist_unhashed(&inode->i_hash)) 165 /*
162 goto out; 166 * If the default thread isn't there, make sure we add it. When
163 } 167 * it gets created and wakes up, we'll run this work.
164 if (inode->i_state & (I_FREEING|I_CLEAR)) 168 */
165 goto out; 169 if (unlikely(list_empty_careful(&bdi->wb_list)))
170 wake_up_process(default_backing_dev_info.wb.task);
171 else {
172 struct bdi_writeback *wb = &bdi->wb;
166 173
167 /* 174 /*
168 * If the inode was already on s_dirty/s_io/s_more_io, don't 175 * If we failed allocating the bdi work item, wake up the wb
169 * reposition it (that would break s_dirty time-ordering). 176 * thread always. As a safety precaution, it'll flush out
177 * everything
170 */ 178 */
171 if (!was_dirty) { 179 if (!wb_has_dirty_io(wb)) {
172 inode->dirtied_when = jiffies; 180 if (work)
173 list_move(&inode->i_list, &sb->s_dirty); 181 wb_clear_pending(wb, work);
174 } 182 } else if (wb->task)
183 wake_up_process(wb->task);
175 } 184 }
176out:
177 spin_unlock(&inode_lock);
178} 185}
179 186
180EXPORT_SYMBOL(__mark_inode_dirty); 187/*
188 * Used for on-stack allocated work items. The caller needs to wait until
189 * the wb threads have acked the work before it's safe to continue.
190 */
191static void bdi_wait_on_work_clear(struct bdi_work *work)
192{
193 wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
194 TASK_UNINTERRUPTIBLE);
195}
181 196
182static int write_inode(struct inode *inode, int sync) 197static struct bdi_work *bdi_alloc_work(struct writeback_control *wbc)
183{ 198{
184 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) 199 struct bdi_work *work;
185 return inode->i_sb->s_op->write_inode(inode, sync); 200
186 return 0; 201 work = kmalloc(sizeof(*work), GFP_ATOMIC);
202 if (work)
203 bdi_work_init(work, wbc);
204
205 return work;
206}
207
208void bdi_start_writeback(struct writeback_control *wbc)
209{
210 const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
211 struct bdi_work work_stack, *work = NULL;
212
213 if (!must_wait)
214 work = bdi_alloc_work(wbc);
215
216 if (!work) {
217 work = &work_stack;
218 bdi_work_init_on_stack(work, wbc);
219 }
220
221 bdi_queue_work(wbc->bdi, work);
222
223 /*
224 * If the sync mode is WB_SYNC_ALL, block waiting for the work to
225 * complete. If not, we only need to wait for the work to be started,
226 * if we allocated it on-stack. We use the same mechanism, if the
227 * wait bit is set in the bdi_work struct, then threads will not
228 * clear pending until after they are done.
229 *
230 * Note that work == &work_stack if must_wait is true, so we don't
231 * need to do call_rcu() here ever, since the completion path will
232 * have done that for us.
233 */
234 if (must_wait || work == &work_stack) {
235 bdi_wait_on_work_clear(work);
236 if (work != &work_stack)
237 call_rcu(&work->rcu_head, bdi_work_free);
238 }
187} 239}
188 240
189/* 241/*
@@ -191,31 +243,32 @@ static int write_inode(struct inode *inode, int sync)
191 * furthest end of its superblock's dirty-inode list. 243 * furthest end of its superblock's dirty-inode list.
192 * 244 *
193 * Before stamping the inode's ->dirtied_when, we check to see whether it is 245 * Before stamping the inode's ->dirtied_when, we check to see whether it is
194 * already the most-recently-dirtied inode on the s_dirty list. If that is 246 * already the most-recently-dirtied inode on the b_dirty list. If that is
195 * the case then the inode must have been redirtied while it was being written 247 * the case then the inode must have been redirtied while it was being written
196 * out and we don't reset its dirtied_when. 248 * out and we don't reset its dirtied_when.
197 */ 249 */
198static void redirty_tail(struct inode *inode) 250static void redirty_tail(struct inode *inode)
199{ 251{
200 struct super_block *sb = inode->i_sb; 252 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
201 253
202 if (!list_empty(&sb->s_dirty)) { 254 if (!list_empty(&wb->b_dirty)) {
203 struct inode *tail_inode; 255 struct inode *tail;
204 256
205 tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list); 257 tail = list_entry(wb->b_dirty.next, struct inode, i_list);
206 if (time_before(inode->dirtied_when, 258 if (time_before(inode->dirtied_when, tail->dirtied_when))
207 tail_inode->dirtied_when))
208 inode->dirtied_when = jiffies; 259 inode->dirtied_when = jiffies;
209 } 260 }
210 list_move(&inode->i_list, &sb->s_dirty); 261 list_move(&inode->i_list, &wb->b_dirty);
211} 262}
212 263
213/* 264/*
214 * requeue inode for re-scanning after sb->s_io list is exhausted. 265 * requeue inode for re-scanning after bdi->b_io list is exhausted.
215 */ 266 */
216static void requeue_io(struct inode *inode) 267static void requeue_io(struct inode *inode)
217{ 268{
218 list_move(&inode->i_list, &inode->i_sb->s_more_io); 269 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
270
271 list_move(&inode->i_list, &wb->b_more_io);
219} 272}
220 273
221static void inode_sync_complete(struct inode *inode) 274static void inode_sync_complete(struct inode *inode)
@@ -262,20 +315,18 @@ static void move_expired_inodes(struct list_head *delaying_queue,
262/* 315/*
263 * Queue all expired dirty inodes for io, eldest first. 316 * Queue all expired dirty inodes for io, eldest first.
264 */ 317 */
265static void queue_io(struct super_block *sb, 318static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
266 unsigned long *older_than_this)
267{ 319{
268 list_splice_init(&sb->s_more_io, sb->s_io.prev); 320 list_splice_init(&wb->b_more_io, wb->b_io.prev);
269 move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); 321 move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
270} 322}
271 323
272int sb_has_dirty_inodes(struct super_block *sb) 324static int write_inode(struct inode *inode, int sync)
273{ 325{
274 return !list_empty(&sb->s_dirty) || 326 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
275 !list_empty(&sb->s_io) || 327 return inode->i_sb->s_op->write_inode(inode, sync);
276 !list_empty(&sb->s_more_io); 328 return 0;
277} 329}
278EXPORT_SYMBOL(sb_has_dirty_inodes);
279 330
280/* 331/*
281 * Wait for writeback on an inode to complete. 332 * Wait for writeback on an inode to complete.
@@ -322,11 +373,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
322 if (inode->i_state & I_SYNC) { 373 if (inode->i_state & I_SYNC) {
323 /* 374 /*
324 * If this inode is locked for writeback and we are not doing 375 * If this inode is locked for writeback and we are not doing
325 * writeback-for-data-integrity, move it to s_more_io so that 376 * writeback-for-data-integrity, move it to b_more_io so that
326 * writeback can proceed with the other inodes on s_io. 377 * writeback can proceed with the other inodes on s_io.
327 * 378 *
328 * We'll have another go at writing back this inode when we 379 * We'll have another go at writing back this inode when we
329 * completed a full scan of s_io. 380 * completed a full scan of b_io.
330 */ 381 */
331 if (!wait) { 382 if (!wait) {
332 requeue_io(inode); 383 requeue_io(inode);
@@ -371,11 +422,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
371 /* 422 /*
372 * We didn't write back all the pages. nfs_writepages() 423 * We didn't write back all the pages. nfs_writepages()
373 * sometimes bales out without doing anything. Redirty 424 * sometimes bales out without doing anything. Redirty
374 * the inode; Move it from s_io onto s_more_io/s_dirty. 425 * the inode; Move it from b_io onto b_more_io/b_dirty.
375 */ 426 */
376 /* 427 /*
377 * akpm: if the caller was the kupdate function we put 428 * akpm: if the caller was the kupdate function we put
378 * this inode at the head of s_dirty so it gets first 429 * this inode at the head of b_dirty so it gets first
379 * consideration. Otherwise, move it to the tail, for 430 * consideration. Otherwise, move it to the tail, for
380 * the reasons described there. I'm not really sure 431 * the reasons described there. I'm not really sure
381 * how much sense this makes. Presumably I had a good 432 * how much sense this makes. Presumably I had a good
@@ -385,7 +436,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
385 if (wbc->for_kupdate) { 436 if (wbc->for_kupdate) {
386 /* 437 /*
387 * For the kupdate function we move the inode 438 * For the kupdate function we move the inode
388 * to s_more_io so it will get more writeout as 439 * to b_more_io so it will get more writeout as
389 * soon as the queue becomes uncongested. 440 * soon as the queue becomes uncongested.
390 */ 441 */
391 inode->i_state |= I_DIRTY_PAGES; 442 inode->i_state |= I_DIRTY_PAGES;
@@ -434,50 +485,84 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
434} 485}
435 486
436/* 487/*
437 * Write out a superblock's list of dirty inodes. A wait will be performed 488 * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
438 * upon no inodes, all inodes or the final one, depending upon sync_mode. 489 * before calling writeback. So make sure that we do pin it, so it doesn't
439 * 490 * go away while we are writing inodes from it.
440 * If older_than_this is non-NULL, then only write out inodes which
441 * had their first dirtying at a time earlier than *older_than_this.
442 *
443 * If we're a pdflush thread, then implement pdflush collision avoidance
444 * against the entire list.
445 * 491 *
446 * If `bdi' is non-zero then we're being asked to writeback a specific queue. 492 * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
447 * This function assumes that the blockdev superblock's inodes are backed by 493 * 1 if we failed.
448 * a variety of queues, so all inodes are searched. For other superblocks,
449 * assume that all inodes are backed by the same queue.
450 *
451 * FIXME: this linear search could get expensive with many fileystems. But
452 * how to fix? We need to go from an address_space to all inodes which share
453 * a queue with that address_space. (Easy: have a global "dirty superblocks"
454 * list).
455 *
456 * The inodes to be written are parked on sb->s_io. They are moved back onto
457 * sb->s_dirty as they are selected for writing. This way, none can be missed
458 * on the writer throttling path, and we get decent balancing between many
459 * throttled threads: we don't want them all piling up on inode_sync_wait.
460 */ 494 */
461void generic_sync_sb_inodes(struct super_block *sb, 495static int pin_sb_for_writeback(struct writeback_control *wbc,
496 struct inode *inode)
497{
498 struct super_block *sb = inode->i_sb;
499
500 /*
501 * Caller must already hold the ref for this
502 */
503 if (wbc->sync_mode == WB_SYNC_ALL) {
504 WARN_ON(!rwsem_is_locked(&sb->s_umount));
505 return 0;
506 }
507
508 spin_lock(&sb_lock);
509 sb->s_count++;
510 if (down_read_trylock(&sb->s_umount)) {
511 if (sb->s_root) {
512 spin_unlock(&sb_lock);
513 return 0;
514 }
515 /*
516 * umounted, drop rwsem again and fall through to failure
517 */
518 up_read(&sb->s_umount);
519 }
520
521 sb->s_count--;
522 spin_unlock(&sb_lock);
523 return 1;
524}
525
526static void unpin_sb_for_writeback(struct writeback_control *wbc,
527 struct inode *inode)
528{
529 struct super_block *sb = inode->i_sb;
530
531 if (wbc->sync_mode == WB_SYNC_ALL)
532 return;
533
534 up_read(&sb->s_umount);
535 put_super(sb);
536}
537
538static void writeback_inodes_wb(struct bdi_writeback *wb,
462 struct writeback_control *wbc) 539 struct writeback_control *wbc)
463{ 540{
541 struct super_block *sb = wbc->sb;
542 const int is_blkdev_sb = sb_is_blkdev_sb(sb);
464 const unsigned long start = jiffies; /* livelock avoidance */ 543 const unsigned long start = jiffies; /* livelock avoidance */
465 int sync = wbc->sync_mode == WB_SYNC_ALL;
466 544
467 spin_lock(&inode_lock); 545 spin_lock(&inode_lock);
468 if (!wbc->for_kupdate || list_empty(&sb->s_io))
469 queue_io(sb, wbc->older_than_this);
470 546
471 while (!list_empty(&sb->s_io)) { 547 if (!wbc->for_kupdate || list_empty(&wb->b_io))
472 struct inode *inode = list_entry(sb->s_io.prev, 548 queue_io(wb, wbc->older_than_this);
549
550 while (!list_empty(&wb->b_io)) {
551 struct inode *inode = list_entry(wb->b_io.prev,
473 struct inode, i_list); 552 struct inode, i_list);
474 struct address_space *mapping = inode->i_mapping;
475 struct backing_dev_info *bdi = mapping->backing_dev_info;
476 long pages_skipped; 553 long pages_skipped;
477 554
478 if (!bdi_cap_writeback_dirty(bdi)) { 555 /*
556 * super block given and doesn't match, skip this inode
557 */
558 if (sb && sb != inode->i_sb) {
559 redirty_tail(inode);
560 continue;
561 }
562
563 if (!bdi_cap_writeback_dirty(wb->bdi)) {
479 redirty_tail(inode); 564 redirty_tail(inode);
480 if (sb_is_blkdev_sb(sb)) { 565 if (is_blkdev_sb) {
481 /* 566 /*
482 * Dirty memory-backed blockdev: the ramdisk 567 * Dirty memory-backed blockdev: the ramdisk
483 * driver does this. Skip just this inode 568 * driver does this. Skip just this inode
@@ -497,21 +582,14 @@ void generic_sync_sb_inodes(struct super_block *sb,
497 continue; 582 continue;
498 } 583 }
499 584
500 if (wbc->nonblocking && bdi_write_congested(bdi)) { 585 if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
501 wbc->encountered_congestion = 1; 586 wbc->encountered_congestion = 1;
502 if (!sb_is_blkdev_sb(sb)) 587 if (!is_blkdev_sb)
503 break; /* Skip a congested fs */ 588 break; /* Skip a congested fs */
504 requeue_io(inode); 589 requeue_io(inode);
505 continue; /* Skip a congested blockdev */ 590 continue; /* Skip a congested blockdev */
506 } 591 }
507 592
508 if (wbc->bdi && bdi != wbc->bdi) {
509 if (!sb_is_blkdev_sb(sb))
510 break; /* fs has the wrong queue */
511 requeue_io(inode);
512 continue; /* blockdev has wrong queue */
513 }
514
515 /* 593 /*
516 * Was this inode dirtied after sync_sb_inodes was called? 594 * Was this inode dirtied after sync_sb_inodes was called?
517 * This keeps sync from extra jobs and livelock. 595 * This keeps sync from extra jobs and livelock.
@@ -519,16 +597,16 @@ void generic_sync_sb_inodes(struct super_block *sb,
519 if (inode_dirtied_after(inode, start)) 597 if (inode_dirtied_after(inode, start))
520 break; 598 break;
521 599
522 /* Is another pdflush already flushing this queue? */ 600 if (pin_sb_for_writeback(wbc, inode)) {
523 if (current_is_pdflush() && !writeback_acquire(bdi)) 601 requeue_io(inode);
524 break; 602 continue;
603 }
525 604
526 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); 605 BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
527 __iget(inode); 606 __iget(inode);
528 pages_skipped = wbc->pages_skipped; 607 pages_skipped = wbc->pages_skipped;
529 writeback_single_inode(inode, wbc); 608 writeback_single_inode(inode, wbc);
530 if (current_is_pdflush()) 609 unpin_sb_for_writeback(wbc, inode);
531 writeback_release(bdi);
532 if (wbc->pages_skipped != pages_skipped) { 610 if (wbc->pages_skipped != pages_skipped) {
533 /* 611 /*
534 * writeback is not making progress due to locked 612 * writeback is not making progress due to locked
@@ -544,144 +622,571 @@ void generic_sync_sb_inodes(struct super_block *sb,
544 wbc->more_io = 1; 622 wbc->more_io = 1;
545 break; 623 break;
546 } 624 }
547 if (!list_empty(&sb->s_more_io)) 625 if (!list_empty(&wb->b_more_io))
548 wbc->more_io = 1; 626 wbc->more_io = 1;
549 } 627 }
550 628
551 if (sync) { 629 spin_unlock(&inode_lock);
552 struct inode *inode, *old_inode = NULL; 630 /* Leave any unwritten inodes on b_io */
631}
632
633void writeback_inodes_wbc(struct writeback_control *wbc)
634{
635 struct backing_dev_info *bdi = wbc->bdi;
553 636
637 writeback_inodes_wb(&bdi->wb, wbc);
638}
639
640/*
641 * The maximum number of pages to writeout in a single bdi flush/kupdate
642 * operation. We do this so we don't hold I_SYNC against an inode for
643 * enormous amounts of time, which would block a userspace task which has
644 * been forced to throttle against that inode. Also, the code reevaluates
645 * the dirty each time it has written this many pages.
646 */
647#define MAX_WRITEBACK_PAGES 1024
648
649static inline bool over_bground_thresh(void)
650{
651 unsigned long background_thresh, dirty_thresh;
652
653 get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
654
655 return (global_page_state(NR_FILE_DIRTY) +
656 global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
657}
658
659/*
660 * Explicit flushing or periodic writeback of "old" data.
661 *
662 * Define "old": the first time one of an inode's pages is dirtied, we mark the
663 * dirtying-time in the inode's address_space. So this periodic writeback code
664 * just walks the superblock inode list, writing back any inodes which are
665 * older than a specific point in time.
666 *
667 * Try to run once per dirty_writeback_interval. But if a writeback event
668 * takes longer than a dirty_writeback_interval interval, then leave a
669 * one-second gap.
670 *
671 * older_than_this takes precedence over nr_to_write. So we'll only write back
672 * all dirty pages if they are all attached to "old" mappings.
673 */
674static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
675 struct super_block *sb,
676 enum writeback_sync_modes sync_mode, int for_kupdate)
677{
678 struct writeback_control wbc = {
679 .bdi = wb->bdi,
680 .sb = sb,
681 .sync_mode = sync_mode,
682 .older_than_this = NULL,
683 .for_kupdate = for_kupdate,
684 .range_cyclic = 1,
685 };
686 unsigned long oldest_jif;
687 long wrote = 0;
688
689 if (wbc.for_kupdate) {
690 wbc.older_than_this = &oldest_jif;
691 oldest_jif = jiffies -
692 msecs_to_jiffies(dirty_expire_interval * 10);
693 }
694
695 for (;;) {
554 /* 696 /*
555 * Data integrity sync. Must wait for all pages under writeback, 697 * Don't flush anything for non-integrity writeback where
556 * because there may have been pages dirtied before our sync 698 * no nr_pages was given
557 * call, but which had writeout started before we write it out.
558 * In which case, the inode may not be on the dirty list, but
559 * we still have to wait for that writeout.
560 */ 699 */
561 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 700 if (!for_kupdate && nr_pages <= 0 && sync_mode == WB_SYNC_NONE)
562 struct address_space *mapping; 701 break;
563 702
564 if (inode->i_state & 703 /*
565 (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) 704 * If no specific pages were given and this is just a
566 continue; 705 * periodic background writeout and we are below the
567 mapping = inode->i_mapping; 706 * background dirty threshold, don't do anything
568 if (mapping->nrpages == 0) 707 */
708 if (for_kupdate && nr_pages <= 0 && !over_bground_thresh())
709 break;
710
711 wbc.more_io = 0;
712 wbc.encountered_congestion = 0;
713 wbc.nr_to_write = MAX_WRITEBACK_PAGES;
714 wbc.pages_skipped = 0;
715 writeback_inodes_wb(wb, &wbc);
716 nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
717 wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
718
719 /*
720 * If we ran out of stuff to write, bail unless more_io got set
721 */
722 if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
723 if (wbc.more_io && !wbc.for_kupdate)
569 continue; 724 continue;
570 __iget(inode); 725 break;
571 spin_unlock(&inode_lock); 726 }
727 }
728
729 return wrote;
730}
731
732/*
733 * Return the next bdi_work struct that hasn't been processed by this
734 * wb thread yet
735 */
736static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
737 struct bdi_writeback *wb)
738{
739 struct bdi_work *work, *ret = NULL;
740
741 rcu_read_lock();
742
743 list_for_each_entry_rcu(work, &bdi->work_list, list) {
744 if (!test_and_clear_bit(wb->nr, &work->seen))
745 continue;
746
747 ret = work;
748 break;
749 }
750
751 rcu_read_unlock();
752 return ret;
753}
754
755static long wb_check_old_data_flush(struct bdi_writeback *wb)
756{
757 unsigned long expired;
758 long nr_pages;
759
760 expired = wb->last_old_flush +
761 msecs_to_jiffies(dirty_writeback_interval * 10);
762 if (time_before(jiffies, expired))
763 return 0;
764
765 wb->last_old_flush = jiffies;
766 nr_pages = global_page_state(NR_FILE_DIRTY) +
767 global_page_state(NR_UNSTABLE_NFS) +
768 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
769
770 if (nr_pages)
771 return wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1);
772
773 return 0;
774}
775
776/*
777 * Retrieve work items and do the writeback they describe
778 */
779long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
780{
781 struct backing_dev_info *bdi = wb->bdi;
782 struct bdi_work *work;
783 long nr_pages, wrote = 0;
784
785 while ((work = get_next_work_item(bdi, wb)) != NULL) {
786 enum writeback_sync_modes sync_mode;
787
788 nr_pages = work->nr_pages;
789
790 /*
791 * Override sync mode, in case we must wait for completion
792 */
793 if (force_wait)
794 work->sync_mode = sync_mode = WB_SYNC_ALL;
795 else
796 sync_mode = work->sync_mode;
797
798 /*
799 * If this isn't a data integrity operation, just notify
800 * that we have seen this work and we are now starting it.
801 */
802 if (sync_mode == WB_SYNC_NONE)
803 wb_clear_pending(wb, work);
804
805 wrote += wb_writeback(wb, nr_pages, work->sb, sync_mode, 0);
806
807 /*
808 * This is a data integrity writeback, so only do the
809 * notification when we have completed the work.
810 */
811 if (sync_mode == WB_SYNC_ALL)
812 wb_clear_pending(wb, work);
813 }
814
815 /*
816 * Check for periodic writeback, kupdated() style
817 */
818 wrote += wb_check_old_data_flush(wb);
819
820 return wrote;
821}
822
823/*
824 * Handle writeback of dirty data for the device backed by this bdi. Also
825 * wakes up periodically and does kupdated style flushing.
826 */
827int bdi_writeback_task(struct bdi_writeback *wb)
828{
829 unsigned long last_active = jiffies;
830 unsigned long wait_jiffies = -1UL;
831 long pages_written;
832
833 while (!kthread_should_stop()) {
834 pages_written = wb_do_writeback(wb, 0);
835
836 if (pages_written)
837 last_active = jiffies;
838 else if (wait_jiffies != -1UL) {
839 unsigned long max_idle;
840
572 /* 841 /*
573 * We hold a reference to 'inode' so it couldn't have 842 * Longest period of inactivity that we tolerate. If we
574 * been removed from s_inodes list while we dropped the 843 * see dirty data again later, the task will get
575 * inode_lock. We cannot iput the inode now as we can 844 * recreated automatically.
576 * be holding the last reference and we cannot iput it
577 * under inode_lock. So we keep the reference and iput
578 * it later.
579 */ 845 */
580 iput(old_inode); 846 max_idle = max(5UL * 60 * HZ, wait_jiffies);
581 old_inode = inode; 847 if (time_after(jiffies, max_idle + last_active))
848 break;
849 }
582 850
583 filemap_fdatawait(mapping); 851 wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
852 set_current_state(TASK_INTERRUPTIBLE);
853 schedule_timeout(wait_jiffies);
854 try_to_freeze();
855 }
584 856
585 cond_resched(); 857 return 0;
858}
859
860/*
861 * Schedule writeback for all backing devices. Expensive! If this is a data
862 * integrity operation, writeback will be complete when this returns. If
863 * we are simply called for WB_SYNC_NONE, then writeback will merely be
864 * scheduled to run.
865 */
866static void bdi_writeback_all(struct writeback_control *wbc)
867{
868 const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
869 struct backing_dev_info *bdi;
870 struct bdi_work *work;
871 LIST_HEAD(list);
872
873restart:
874 spin_lock(&bdi_lock);
875
876 list_for_each_entry(bdi, &bdi_list, bdi_list) {
877 struct bdi_work *work;
586 878
587 spin_lock(&inode_lock); 879 if (!bdi_has_dirty_io(bdi))
880 continue;
881
882 /*
883 * If work allocation fails, do the writes inline. We drop
884 * the lock and restart the list writeout. This should be OK,
885 * since this happens rarely and because the writeout should
886 * eventually make more free memory available.
887 */
888 work = bdi_alloc_work(wbc);
889 if (!work) {
890 struct writeback_control __wbc;
891
892 /*
893 * Not a data integrity writeout, just continue
894 */
895 if (!must_wait)
896 continue;
897
898 spin_unlock(&bdi_lock);
899 __wbc = *wbc;
900 __wbc.bdi = bdi;
901 writeback_inodes_wbc(&__wbc);
902 goto restart;
588 } 903 }
589 spin_unlock(&inode_lock); 904 if (must_wait)
590 iput(old_inode); 905 list_add_tail(&work->wait_list, &list);
591 } else 906
592 spin_unlock(&inode_lock); 907 bdi_queue_work(bdi, work);
908 }
909
910 spin_unlock(&bdi_lock);
593 911
594 return; /* Leave any unwritten inodes on s_io */ 912 /*
913 * If this is for WB_SYNC_ALL, wait for pending work to complete
914 * before returning.
915 */
916 while (!list_empty(&list)) {
917 work = list_entry(list.next, struct bdi_work, wait_list);
918 list_del(&work->wait_list);
919 bdi_wait_on_work_clear(work);
920 call_rcu(&work->rcu_head, bdi_work_free);
921 }
595} 922}
596EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
597 923
598static void sync_sb_inodes(struct super_block *sb, 924/*
599 struct writeback_control *wbc) 925 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back
926 * the whole world.
927 */
928void wakeup_flusher_threads(long nr_pages)
600{ 929{
601 generic_sync_sb_inodes(sb, wbc); 930 struct writeback_control wbc = {
931 .sync_mode = WB_SYNC_NONE,
932 .older_than_this = NULL,
933 .range_cyclic = 1,
934 };
935
936 if (nr_pages == 0)
937 nr_pages = global_page_state(NR_FILE_DIRTY) +
938 global_page_state(NR_UNSTABLE_NFS);
939 wbc.nr_to_write = nr_pages;
940 bdi_writeback_all(&wbc);
602} 941}
603 942
604/* 943static noinline void block_dump___mark_inode_dirty(struct inode *inode)
605 * Start writeback of dirty pagecache data against all unlocked inodes. 944{
945 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
946 struct dentry *dentry;
947 const char *name = "?";
948
949 dentry = d_find_alias(inode);
950 if (dentry) {
951 spin_lock(&dentry->d_lock);
952 name = (const char *) dentry->d_name.name;
953 }
954 printk(KERN_DEBUG
955 "%s(%d): dirtied inode %lu (%s) on %s\n",
956 current->comm, task_pid_nr(current), inode->i_ino,
957 name, inode->i_sb->s_id);
958 if (dentry) {
959 spin_unlock(&dentry->d_lock);
960 dput(dentry);
961 }
962 }
963}
964
965/**
966 * __mark_inode_dirty - internal function
967 * @inode: inode to mark
968 * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
969 * Mark an inode as dirty. Callers should use mark_inode_dirty or
970 * mark_inode_dirty_sync.
606 * 971 *
607 * Note: 972 * Put the inode on the super block's dirty list.
608 * We don't need to grab a reference to superblock here. If it has non-empty 973 *
609 * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed 974 * CAREFUL! We mark it dirty unconditionally, but move it onto the
610 * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all 975 * dirty list only if it is hashed or if it refers to a blockdev.
611 * empty. Since __sync_single_inode() regains inode_lock before it finally moves 976 * If it was not hashed, it will never be added to the dirty list
612 * inode from superblock lists we are OK. 977 * even if it is later hashed, as it will have been marked dirty already.
613 * 978 *
614 * If `older_than_this' is non-zero then only flush inodes which have a 979 * In short, make sure you hash any inodes _before_ you start marking
615 * flushtime older than *older_than_this. 980 * them dirty.
616 * 981 *
617 * If `bdi' is non-zero then we will scan the first inode against each 982 * This function *must* be atomic for the I_DIRTY_PAGES case -
618 * superblock until we find the matching ones. One group will be the dirty 983 * set_page_dirty() is called under spinlock in several places.
619 * inodes against a filesystem. Then when we hit the dummy blockdev superblock, 984 *
620 * sync_sb_inodes will seekout the blockdev which matches `bdi'. Maybe not 985 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
621 * super-efficient but we're about to do a ton of I/O... 986 * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
987 * the kernel-internal blockdev inode represents the dirtying time of the
988 * blockdev's pages. This is why for I_DIRTY_PAGES we always use
989 * page->mapping->host, so the page-dirtying time is recorded in the internal
990 * blockdev inode.
622 */ 991 */
623void 992void __mark_inode_dirty(struct inode *inode, int flags)
624writeback_inodes(struct writeback_control *wbc)
625{ 993{
626 struct super_block *sb; 994 struct super_block *sb = inode->i_sb;
627 995
628 might_sleep(); 996 /*
629 spin_lock(&sb_lock); 997 * Don't do this for I_DIRTY_PAGES - that doesn't actually
630restart: 998 * dirty the inode itself
631 list_for_each_entry_reverse(sb, &super_blocks, s_list) { 999 */
632 if (sb_has_dirty_inodes(sb)) { 1000 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
633 /* we're making our own get_super here */ 1001 if (sb->s_op->dirty_inode)
634 sb->s_count++; 1002 sb->s_op->dirty_inode(inode);
635 spin_unlock(&sb_lock); 1003 }
636 /* 1004
637 * If we can't get the readlock, there's no sense in 1005 /*
638 * waiting around, most of the time the FS is going to 1006 * make sure that changes are seen by all cpus before we test i_state
639 * be unmounted by the time it is released. 1007 * -- mikulas
640 */ 1008 */
641 if (down_read_trylock(&sb->s_umount)) { 1009 smp_mb();
642 if (sb->s_root) 1010
643 sync_sb_inodes(sb, wbc); 1011 /* avoid the locking if we can */
644 up_read(&sb->s_umount); 1012 if ((inode->i_state & flags) == flags)
1013 return;
1014
1015 if (unlikely(block_dump))
1016 block_dump___mark_inode_dirty(inode);
1017
1018 spin_lock(&inode_lock);
1019 if ((inode->i_state & flags) != flags) {
1020 const int was_dirty = inode->i_state & I_DIRTY;
1021
1022 inode->i_state |= flags;
1023
1024 /*
1025 * If the inode is being synced, just update its dirty state.
1026 * The unlocker will place the inode on the appropriate
1027 * superblock list, based upon its state.
1028 */
1029 if (inode->i_state & I_SYNC)
1030 goto out;
1031
1032 /*
1033 * Only add valid (hashed) inodes to the superblock's
1034 * dirty list. Add blockdev inodes as well.
1035 */
1036 if (!S_ISBLK(inode->i_mode)) {
1037 if (hlist_unhashed(&inode->i_hash))
1038 goto out;
1039 }
1040 if (inode->i_state & (I_FREEING|I_CLEAR))
1041 goto out;
1042
1043 /*
1044 * If the inode was already on b_dirty/b_io/b_more_io, don't
1045 * reposition it (that would break b_dirty time-ordering).
1046 */
1047 if (!was_dirty) {
1048 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
1049 struct backing_dev_info *bdi = wb->bdi;
1050
1051 if (bdi_cap_writeback_dirty(bdi) &&
1052 !test_bit(BDI_registered, &bdi->state)) {
1053 WARN_ON(1);
1054 printk(KERN_ERR "bdi-%s not registered\n",
1055 bdi->name);
645 } 1056 }
646 spin_lock(&sb_lock); 1057
647 if (__put_super_and_need_restart(sb)) 1058 inode->dirtied_when = jiffies;
648 goto restart; 1059 list_move(&inode->i_list, &wb->b_dirty);
649 } 1060 }
650 if (wbc->nr_to_write <= 0)
651 break;
652 } 1061 }
653 spin_unlock(&sb_lock); 1062out:
1063 spin_unlock(&inode_lock);
654} 1064}
1065EXPORT_SYMBOL(__mark_inode_dirty);
655 1066
656/* 1067/*
657 * writeback and wait upon the filesystem's dirty inodes. The caller will 1068 * Write out a superblock's list of dirty inodes. A wait will be performed
658 * do this in two passes - one to write, and one to wait. 1069 * upon no inodes, all inodes or the final one, depending upon sync_mode.
1070 *
1071 * If older_than_this is non-NULL, then only write out inodes which
1072 * had their first dirtying at a time earlier than *older_than_this.
659 * 1073 *
660 * A finite limit is set on the number of pages which will be written. 1074 * If we're a pdlfush thread, then implement pdflush collision avoidance
661 * To prevent infinite livelock of sys_sync(). 1075 * against the entire list.
662 * 1076 *
663 * We add in the number of potentially dirty inodes, because each inode write 1077 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
664 * can dirty pagecache in the underlying blockdev. 1078 * This function assumes that the blockdev superblock's inodes are backed by
1079 * a variety of queues, so all inodes are searched. For other superblocks,
1080 * assume that all inodes are backed by the same queue.
1081 *
1082 * The inodes to be written are parked on bdi->b_io. They are moved back onto
1083 * bdi->b_dirty as they are selected for writing. This way, none can be missed
1084 * on the writer throttling path, and we get decent balancing between many
1085 * throttled threads: we don't want them all piling up on inode_sync_wait.
665 */ 1086 */
666void sync_inodes_sb(struct super_block *sb, int wait) 1087static void wait_sb_inodes(struct writeback_control *wbc)
1088{
1089 struct inode *inode, *old_inode = NULL;
1090
1091 /*
1092 * We need to be protected against the filesystem going from
1093 * r/o to r/w or vice versa.
1094 */
1095 WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount));
1096
1097 spin_lock(&inode_lock);
1098
1099 /*
1100 * Data integrity sync. Must wait for all pages under writeback,
1101 * because there may have been pages dirtied before our sync
1102 * call, but which had writeout started before we write it out.
1103 * In which case, the inode may not be on the dirty list, but
1104 * we still have to wait for that writeout.
1105 */
1106 list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) {
1107 struct address_space *mapping;
1108
1109 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
1110 continue;
1111 mapping = inode->i_mapping;
1112 if (mapping->nrpages == 0)
1113 continue;
1114 __iget(inode);
1115 spin_unlock(&inode_lock);
1116 /*
1117 * We hold a reference to 'inode' so it couldn't have
1118 * been removed from s_inodes list while we dropped the
1119 * inode_lock. We cannot iput the inode now as we can
1120 * be holding the last reference and we cannot iput it
1121 * under inode_lock. So we keep the reference and iput
1122 * it later.
1123 */
1124 iput(old_inode);
1125 old_inode = inode;
1126
1127 filemap_fdatawait(mapping);
1128
1129 cond_resched();
1130
1131 spin_lock(&inode_lock);
1132 }
1133 spin_unlock(&inode_lock);
1134 iput(old_inode);
1135}
1136
1137/**
1138 * writeback_inodes_sb - writeback dirty inodes from given super_block
1139 * @sb: the superblock
1140 *
1141 * Start writeback on some inodes on this super_block. No guarantees are made
1142 * on how many (if any) will be written, and this function does not wait
1143 * for IO completion of submitted IO. The number of pages submitted is
1144 * returned.
1145 */
1146long writeback_inodes_sb(struct super_block *sb)
667{ 1147{
668 struct writeback_control wbc = { 1148 struct writeback_control wbc = {
669 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, 1149 .sb = sb,
1150 .sync_mode = WB_SYNC_NONE,
670 .range_start = 0, 1151 .range_start = 0,
671 .range_end = LLONG_MAX, 1152 .range_end = LLONG_MAX,
672 }; 1153 };
1154 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
1155 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
1156 long nr_to_write;
673 1157
674 if (!wait) { 1158 nr_to_write = nr_dirty + nr_unstable +
675 unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
676 unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
677
678 wbc.nr_to_write = nr_dirty + nr_unstable +
679 (inodes_stat.nr_inodes - inodes_stat.nr_unused); 1159 (inodes_stat.nr_inodes - inodes_stat.nr_unused);
680 } else
681 wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */
682 1160
683 sync_sb_inodes(sb, &wbc); 1161 wbc.nr_to_write = nr_to_write;
1162 bdi_writeback_all(&wbc);
1163 return nr_to_write - wbc.nr_to_write;
1164}
1165EXPORT_SYMBOL(writeback_inodes_sb);
1166
1167/**
1168 * sync_inodes_sb - sync sb inode pages
1169 * @sb: the superblock
1170 *
1171 * This function writes and waits on any dirty inode belonging to this
1172 * super_block. The number of pages synced is returned.
1173 */
1174long sync_inodes_sb(struct super_block *sb)
1175{
1176 struct writeback_control wbc = {
1177 .sb = sb,
1178 .sync_mode = WB_SYNC_ALL,
1179 .range_start = 0,
1180 .range_end = LLONG_MAX,
1181 };
1182 long nr_to_write = LONG_MAX; /* doesn't actually matter */
1183
1184 wbc.nr_to_write = nr_to_write;
1185 bdi_writeback_all(&wbc);
1186 wait_sb_inodes(&wbc);
1187 return nr_to_write - wbc.nr_to_write;
684} 1188}
1189EXPORT_SYMBOL(sync_inodes_sb);
685 1190
686/** 1191/**
687 * write_inode_now - write an inode to disk 1192 * write_inode_now - write an inode to disk
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f91ccc4a189d..4567db6f9430 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -801,6 +801,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
801{ 801{
802 int err; 802 int err;
803 803
804 fc->bdi.name = "fuse";
804 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; 805 fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
805 fc->bdi.unplug_io_fn = default_unplug_io_fn; 806 fc->bdi.unplug_io_fn = default_unplug_io_fn;
806 /* fuse does it's own writeback accounting */ 807 /* fuse does it's own writeback accounting */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cb88dac8ccaa..a93b885311d8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -44,6 +44,7 @@ static const struct inode_operations hugetlbfs_dir_inode_operations;
44static const struct inode_operations hugetlbfs_inode_operations; 44static const struct inode_operations hugetlbfs_inode_operations;
45 45
46static struct backing_dev_info hugetlbfs_backing_dev_info = { 46static struct backing_dev_info hugetlbfs_backing_dev_info = {
47 .name = "hugetlbfs",
47 .ra_pages = 0, /* No readahead */ 48 .ra_pages = 0, /* No readahead */
48 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 49 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
49}; 50};
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 8fcb6239218e..7edb62e97419 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -258,7 +258,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
258 return rc; 258 return rc;
259} 259}
260 260
261static int jffs2_check_acl(struct inode *inode, int mask) 261int jffs2_check_acl(struct inode *inode, int mask)
262{ 262{
263 struct posix_acl *acl; 263 struct posix_acl *acl;
264 int rc; 264 int rc;
@@ -274,11 +274,6 @@ static int jffs2_check_acl(struct inode *inode, int mask)
274 return -EAGAIN; 274 return -EAGAIN;
275} 275}
276 276
277int jffs2_permission(struct inode *inode, int mask)
278{
279 return generic_permission(inode, mask, jffs2_check_acl);
280}
281
282int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) 277int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
283{ 278{
284 struct posix_acl *acl, *clone; 279 struct posix_acl *acl, *clone;
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index fc929f2a14f6..f0ba63e3c36b 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@ struct jffs2_acl_header {
26 26
27#ifdef CONFIG_JFFS2_FS_POSIX_ACL 27#ifdef CONFIG_JFFS2_FS_POSIX_ACL
28 28
29extern int jffs2_permission(struct inode *, int); 29extern int jffs2_check_acl(struct inode *, int);
30extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
32extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
@@ -36,7 +36,7 @@ extern struct xattr_handler jffs2_acl_default_xattr_handler;
36 36
37#else 37#else
38 38
39#define jffs2_permission (NULL) 39#define jffs2_check_acl (NULL)
40#define jffs2_acl_chmod(inode) (0) 40#define jffs2_acl_chmod(inode) (0)
41#define jffs2_init_acl_pre(dir_i,inode,mode) (0) 41#define jffs2_init_acl_pre(dir_i,inode,mode) (0)
42#define jffs2_init_acl_post(inode) (0) 42#define jffs2_init_acl_post(inode) (0)
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 6f60cc910f4c..7aa4417e085f 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -55,7 +55,7 @@ const struct inode_operations jffs2_dir_inode_operations =
55 .rmdir = jffs2_rmdir, 55 .rmdir = jffs2_rmdir,
56 .mknod = jffs2_mknod, 56 .mknod = jffs2_mknod,
57 .rename = jffs2_rename, 57 .rename = jffs2_rename,
58 .permission = jffs2_permission, 58 .check_acl = jffs2_check_acl,
59 .setattr = jffs2_setattr, 59 .setattr = jffs2_setattr,
60 .setxattr = jffs2_setxattr, 60 .setxattr = jffs2_setxattr,
61 .getxattr = jffs2_getxattr, 61 .getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 23c947539864..b7b74e299142 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -56,7 +56,7 @@ const struct file_operations jffs2_file_operations =
56 56
57const struct inode_operations jffs2_file_inode_operations = 57const struct inode_operations jffs2_file_inode_operations =
58{ 58{
59 .permission = jffs2_permission, 59 .check_acl = jffs2_check_acl,
60 .setattr = jffs2_setattr, 60 .setattr = jffs2_setattr,
61 .setxattr = jffs2_setxattr, 61 .setxattr = jffs2_setxattr,
62 .getxattr = jffs2_getxattr, 62 .getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index b7339c3b6ad9..4ec11e8bda8c 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -21,7 +21,7 @@ const struct inode_operations jffs2_symlink_inode_operations =
21{ 21{
22 .readlink = generic_readlink, 22 .readlink = generic_readlink,
23 .follow_link = jffs2_follow_link, 23 .follow_link = jffs2_follow_link,
24 .permission = jffs2_permission, 24 .check_acl = jffs2_check_acl,
25 .setattr = jffs2_setattr, 25 .setattr = jffs2_setattr,
26 .setxattr = jffs2_setxattr, 26 .setxattr = jffs2_setxattr,
27 .getxattr = jffs2_getxattr, 27 .getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index d9a721e6db70..5ef7bac265e5 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1268,10 +1268,20 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) {
1268 if (!c->wbuf) 1268 if (!c->wbuf)
1269 return -ENOMEM; 1269 return -ENOMEM;
1270 1270
1271#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
1272 c->wbuf_verify = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1273 if (!c->wbuf_verify) {
1274 kfree(c->wbuf);
1275 return -ENOMEM;
1276 }
1277#endif
1271 return 0; 1278 return 0;
1272} 1279}
1273 1280
1274void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) { 1281void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) {
1282#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
1283 kfree(c->wbuf_verify);
1284#endif
1275 kfree(c->wbuf); 1285 kfree(c->wbuf);
1276} 1286}
1277 1287
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index a29c7c3e3fb8..d66477c34306 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,7 +114,7 @@ out:
114 return rc; 114 return rc;
115} 115}
116 116
117static int jfs_check_acl(struct inode *inode, int mask) 117int jfs_check_acl(struct inode *inode, int mask)
118{ 118{
119 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); 119 struct posix_acl *acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
120 120
@@ -129,11 +129,6 @@ static int jfs_check_acl(struct inode *inode, int mask)
129 return -EAGAIN; 129 return -EAGAIN;
130} 130}
131 131
132int jfs_permission(struct inode *inode, int mask)
133{
134 return generic_permission(inode, mask, jfs_check_acl);
135}
136
137int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) 132int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir)
138{ 133{
139 struct posix_acl *acl = NULL; 134 struct posix_acl *acl = NULL;
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 7f6063acaa3b..2b70fa78e4a7 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -96,7 +96,7 @@ const struct inode_operations jfs_file_inode_operations = {
96 .removexattr = jfs_removexattr, 96 .removexattr = jfs_removexattr,
97#ifdef CONFIG_JFS_POSIX_ACL 97#ifdef CONFIG_JFS_POSIX_ACL
98 .setattr = jfs_setattr, 98 .setattr = jfs_setattr,
99 .permission = jfs_permission, 99 .check_acl = jfs_check_acl,
100#endif 100#endif
101}; 101};
102 102
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 88475f10a389..b07bd417ef85 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23int jfs_permission(struct inode *, int); 23int jfs_check_acl(struct inode *, int);
24int jfs_init_acl(tid_t, struct inode *, struct inode *); 24int jfs_init_acl(tid_t, struct inode *, struct inode *);
25int jfs_setattr(struct dentry *, struct iattr *); 25int jfs_setattr(struct dentry *, struct iattr *);
26 26
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 514ee2edb92a..c79a4270f083 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1543,7 +1543,7 @@ const struct inode_operations jfs_dir_inode_operations = {
1543 .removexattr = jfs_removexattr, 1543 .removexattr = jfs_removexattr,
1544#ifdef CONFIG_JFS_POSIX_ACL 1544#ifdef CONFIG_JFS_POSIX_ACL
1545 .setattr = jfs_setattr, 1545 .setattr = jfs_setattr,
1546 .permission = jfs_permission, 1546 .check_acl = jfs_check_acl,
1547#endif 1547#endif
1548}; 1548};
1549 1549
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 99d737bd4325..7cb076ac6b45 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -87,18 +87,6 @@ static unsigned int nlm_hash_address(const struct sockaddr *sap)
87 return hash & (NLM_HOST_NRHASH - 1); 87 return hash & (NLM_HOST_NRHASH - 1);
88} 88}
89 89
90static void nlm_clear_port(struct sockaddr *sap)
91{
92 switch (sap->sa_family) {
93 case AF_INET:
94 ((struct sockaddr_in *)sap)->sin_port = 0;
95 break;
96 case AF_INET6:
97 ((struct sockaddr_in6 *)sap)->sin6_port = 0;
98 break;
99 }
100}
101
102/* 90/*
103 * Common host lookup routine for server & client 91 * Common host lookup routine for server & client
104 */ 92 */
@@ -177,7 +165,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
177 host->h_addrbuf = nsm->sm_addrbuf; 165 host->h_addrbuf = nsm->sm_addrbuf;
178 memcpy(nlm_addr(host), ni->sap, ni->salen); 166 memcpy(nlm_addr(host), ni->sap, ni->salen);
179 host->h_addrlen = ni->salen; 167 host->h_addrlen = ni->salen;
180 nlm_clear_port(nlm_addr(host)); 168 rpc_set_port(nlm_addr(host), 0);
181 memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); 169 memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
182 host->h_version = ni->version; 170 host->h_version = ni->version;
183 host->h_proto = ni->protocol; 171 host->h_proto = ni->protocol;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 7fce1b525849..30c933188dd7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,43 +61,6 @@ static inline struct sockaddr *nsm_addr(const struct nsm_handle *nsm)
61 return (struct sockaddr *)&nsm->sm_addr; 61 return (struct sockaddr *)&nsm->sm_addr;
62} 62}
63 63
64static void nsm_display_ipv4_address(const struct sockaddr *sap, char *buf,
65 const size_t len)
66{
67 const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
68 snprintf(buf, len, "%pI4", &sin->sin_addr.s_addr);
69}
70
71static void nsm_display_ipv6_address(const struct sockaddr *sap, char *buf,
72 const size_t len)
73{
74 const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
75
76 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
77 snprintf(buf, len, "%pI4", &sin6->sin6_addr.s6_addr32[3]);
78 else if (sin6->sin6_scope_id != 0)
79 snprintf(buf, len, "%pI6%%%u", &sin6->sin6_addr,
80 sin6->sin6_scope_id);
81 else
82 snprintf(buf, len, "%pI6", &sin6->sin6_addr);
83}
84
85static void nsm_display_address(const struct sockaddr *sap,
86 char *buf, const size_t len)
87{
88 switch (sap->sa_family) {
89 case AF_INET:
90 nsm_display_ipv4_address(sap, buf, len);
91 break;
92 case AF_INET6:
93 nsm_display_ipv6_address(sap, buf, len);
94 break;
95 default:
96 snprintf(buf, len, "unsupported address family");
97 break;
98 }
99}
100
101static struct rpc_clnt *nsm_create(void) 64static struct rpc_clnt *nsm_create(void)
102{ 65{
103 struct sockaddr_in sin = { 66 struct sockaddr_in sin = {
@@ -307,8 +270,11 @@ static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
307 memcpy(nsm_addr(new), sap, salen); 270 memcpy(nsm_addr(new), sap, salen);
308 new->sm_addrlen = salen; 271 new->sm_addrlen = salen;
309 nsm_init_private(new); 272 nsm_init_private(new);
310 nsm_display_address((const struct sockaddr *)&new->sm_addr, 273
311 new->sm_addrbuf, sizeof(new->sm_addrbuf)); 274 if (rpc_ntop(nsm_addr(new), new->sm_addrbuf,
275 sizeof(new->sm_addrbuf)) == 0)
276 (void)snprintf(new->sm_addrbuf, sizeof(new->sm_addrbuf),
277 "unsupported address family");
312 memcpy(new->sm_name, hostname, hostname_len); 278 memcpy(new->sm_name, hostname, hostname_len);
313 new->sm_name[hostname_len] = '\0'; 279 new->sm_name[hostname_len] = '\0';
314 280
diff --git a/fs/locks.c b/fs/locks.c
index b6440f52178f..19ee18a6829b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -768,7 +768,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
768 * give it the opportunity to lock the file. 768 * give it the opportunity to lock the file.
769 */ 769 */
770 if (found) 770 if (found)
771 cond_resched_bkl(); 771 cond_resched();
772 772
773find_conflict: 773find_conflict:
774 for_each_lock(inode, before) { 774 for_each_lock(inode, before) {
@@ -1591,7 +1591,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
1591 if (can_sleep) 1591 if (can_sleep)
1592 lock->fl_flags |= FL_SLEEP; 1592 lock->fl_flags |= FL_SLEEP;
1593 1593
1594 error = security_file_lock(filp, cmd); 1594 error = security_file_lock(filp, lock->fl_type);
1595 if (error) 1595 if (error)
1596 goto out_free; 1596 goto out_free;
1597 1597
diff --git a/fs/namei.c b/fs/namei.c
index f3c5b278895a..d11f404667e9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,19 +169,10 @@ void putname(const char *name)
169EXPORT_SYMBOL(putname); 169EXPORT_SYMBOL(putname);
170#endif 170#endif
171 171
172 172/*
173/** 173 * This does basic POSIX ACL permission checking
174 * generic_permission - check for access rights on a Posix-like filesystem
175 * @inode: inode to check access rights for
176 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
177 * @check_acl: optional callback to check for Posix ACLs
178 *
179 * Used to check for read/write/execute permissions on a file.
180 * We use "fsuid" for this, letting us set arbitrary permissions
181 * for filesystem access without changing the "normal" uids which
182 * are used for other things..
183 */ 174 */
184int generic_permission(struct inode *inode, int mask, 175static int acl_permission_check(struct inode *inode, int mask,
185 int (*check_acl)(struct inode *inode, int mask)) 176 int (*check_acl)(struct inode *inode, int mask))
186{ 177{
187 umode_t mode = inode->i_mode; 178 umode_t mode = inode->i_mode;
@@ -193,9 +184,7 @@ int generic_permission(struct inode *inode, int mask,
193 else { 184 else {
194 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 185 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
195 int error = check_acl(inode, mask); 186 int error = check_acl(inode, mask);
196 if (error == -EACCES) 187 if (error != -EAGAIN)
197 goto check_capabilities;
198 else if (error != -EAGAIN)
199 return error; 188 return error;
200 } 189 }
201 190
@@ -208,8 +197,32 @@ int generic_permission(struct inode *inode, int mask,
208 */ 197 */
209 if ((mask & ~mode) == 0) 198 if ((mask & ~mode) == 0)
210 return 0; 199 return 0;
200 return -EACCES;
201}
202
203/**
204 * generic_permission - check for access rights on a Posix-like filesystem
205 * @inode: inode to check access rights for
206 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
207 * @check_acl: optional callback to check for Posix ACLs
208 *
209 * Used to check for read/write/execute permissions on a file.
210 * We use "fsuid" for this, letting us set arbitrary permissions
211 * for filesystem access without changing the "normal" uids which
212 * are used for other things..
213 */
214int generic_permission(struct inode *inode, int mask,
215 int (*check_acl)(struct inode *inode, int mask))
216{
217 int ret;
218
219 /*
220 * Do the basic POSIX ACL permission checks.
221 */
222 ret = acl_permission_check(inode, mask, check_acl);
223 if (ret != -EACCES)
224 return ret;
211 225
212 check_capabilities:
213 /* 226 /*
214 * Read/write DACs are always overridable. 227 * Read/write DACs are always overridable.
215 * Executable DACs are overridable if at least one exec bit is set. 228 * Executable DACs are overridable if at least one exec bit is set.
@@ -262,7 +275,7 @@ int inode_permission(struct inode *inode, int mask)
262 if (inode->i_op->permission) 275 if (inode->i_op->permission)
263 retval = inode->i_op->permission(inode, mask); 276 retval = inode->i_op->permission(inode, mask);
264 else 277 else
265 retval = generic_permission(inode, mask, NULL); 278 retval = generic_permission(inode, mask, inode->i_op->check_acl);
266 279
267 if (retval) 280 if (retval)
268 return retval; 281 return retval;
@@ -432,29 +445,22 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
432 */ 445 */
433static int exec_permission_lite(struct inode *inode) 446static int exec_permission_lite(struct inode *inode)
434{ 447{
435 umode_t mode = inode->i_mode; 448 int ret;
436 449
437 if (inode->i_op->permission) 450 if (inode->i_op->permission) {
438 return -EAGAIN; 451 ret = inode->i_op->permission(inode, MAY_EXEC);
439 452 if (!ret)
440 if (current_fsuid() == inode->i_uid) 453 goto ok;
441 mode >>= 6; 454 return ret;
442 else if (in_group_p(inode->i_gid)) 455 }
443 mode >>= 3; 456 ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl);
444 457 if (!ret)
445 if (mode & MAY_EXEC)
446 goto ok;
447
448 if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE))
449 goto ok;
450
451 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE))
452 goto ok; 458 goto ok;
453 459
454 if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) 460 if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
455 goto ok; 461 goto ok;
456 462
457 return -EACCES; 463 return ret;
458ok: 464ok:
459 return security_inode_permission(inode, MAY_EXEC); 465 return security_inode_permission(inode, MAY_EXEC);
460} 466}
@@ -853,12 +859,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
853 859
854 nd->flags |= LOOKUP_CONTINUE; 860 nd->flags |= LOOKUP_CONTINUE;
855 err = exec_permission_lite(inode); 861 err = exec_permission_lite(inode);
856 if (err == -EAGAIN)
857 err = inode_permission(nd->path.dentry->d_inode,
858 MAY_EXEC);
859 if (!err)
860 err = ima_path_check(&nd->path, MAY_EXEC,
861 IMA_COUNT_UPDATE);
862 if (err) 862 if (err)
863 break; 863 break;
864 864
@@ -1533,37 +1533,42 @@ int may_open(struct path *path, int acc_mode, int flag)
1533 if (error) 1533 if (error)
1534 return error; 1534 return error;
1535 1535
1536 error = ima_path_check(path, 1536 error = ima_path_check(path, acc_mode ?
1537 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC), 1537 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
1538 ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
1538 IMA_COUNT_UPDATE); 1539 IMA_COUNT_UPDATE);
1540
1539 if (error) 1541 if (error)
1540 return error; 1542 return error;
1541 /* 1543 /*
1542 * An append-only file must be opened in append mode for writing. 1544 * An append-only file must be opened in append mode for writing.
1543 */ 1545 */
1544 if (IS_APPEND(inode)) { 1546 if (IS_APPEND(inode)) {
1547 error = -EPERM;
1545 if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) 1548 if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1546 return -EPERM; 1549 goto err_out;
1547 if (flag & O_TRUNC) 1550 if (flag & O_TRUNC)
1548 return -EPERM; 1551 goto err_out;
1549 } 1552 }
1550 1553
1551 /* O_NOATIME can only be set by the owner or superuser */ 1554 /* O_NOATIME can only be set by the owner or superuser */
1552 if (flag & O_NOATIME) 1555 if (flag & O_NOATIME)
1553 if (!is_owner_or_cap(inode)) 1556 if (!is_owner_or_cap(inode)) {
1554 return -EPERM; 1557 error = -EPERM;
1558 goto err_out;
1559 }
1555 1560
1556 /* 1561 /*
1557 * Ensure there are no outstanding leases on the file. 1562 * Ensure there are no outstanding leases on the file.
1558 */ 1563 */
1559 error = break_lease(inode, flag); 1564 error = break_lease(inode, flag);
1560 if (error) 1565 if (error)
1561 return error; 1566 goto err_out;
1562 1567
1563 if (flag & O_TRUNC) { 1568 if (flag & O_TRUNC) {
1564 error = get_write_access(inode); 1569 error = get_write_access(inode);
1565 if (error) 1570 if (error)
1566 return error; 1571 goto err_out;
1567 1572
1568 /* 1573 /*
1569 * Refuse to truncate files with mandatory locks held on them. 1574 * Refuse to truncate files with mandatory locks held on them.
@@ -1581,12 +1586,17 @@ int may_open(struct path *path, int acc_mode, int flag)
1581 } 1586 }
1582 put_write_access(inode); 1587 put_write_access(inode);
1583 if (error) 1588 if (error)
1584 return error; 1589 goto err_out;
1585 } else 1590 } else
1586 if (flag & FMODE_WRITE) 1591 if (flag & FMODE_WRITE)
1587 vfs_dq_init(inode); 1592 vfs_dq_init(inode);
1588 1593
1589 return 0; 1594 return 0;
1595err_out:
1596 ima_counts_put(path, acc_mode ?
1597 acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
1598 ACC_MODE(flag) & (MAY_READ | MAY_WRITE));
1599 return error;
1590} 1600}
1591 1601
1592/* 1602/*
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 845159814de2..da7fda639eac 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,7 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
6 6
7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \ 7nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
8 direct.o pagelist.o proc.o read.o symlink.o unlink.o \ 8 direct.o pagelist.o proc.o read.o symlink.o unlink.o \
9 write.o namespace.o mount_clnt.o 9 write.o namespace.o mount_clnt.o \
10 dns_resolve.o cache_lib.o
10nfs-$(CONFIG_ROOT_NFS) += nfsroot.o 11nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
11nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o 12nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
12nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o 13nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
new file mode 100644
index 000000000000..b4ffd0146ea6
--- /dev/null
+++ b/fs/nfs/cache_lib.c
@@ -0,0 +1,140 @@
1/*
2 * linux/fs/nfs/cache_lib.c
3 *
4 * Helper routines for the NFS client caches
5 *
6 * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
7 */
8#include <linux/kmod.h>
9#include <linux/module.h>
10#include <linux/moduleparam.h>
11#include <linux/mount.h>
12#include <linux/namei.h>
13#include <linux/sunrpc/cache.h>
14#include <linux/sunrpc/rpc_pipe_fs.h>
15
16#include "cache_lib.h"
17
18#define NFS_CACHE_UPCALL_PATHLEN 256
19#define NFS_CACHE_UPCALL_TIMEOUT 15
20
21static char nfs_cache_getent_prog[NFS_CACHE_UPCALL_PATHLEN] =
22 "/sbin/nfs_cache_getent";
23static unsigned long nfs_cache_getent_timeout = NFS_CACHE_UPCALL_TIMEOUT;
24
25module_param_string(cache_getent, nfs_cache_getent_prog,
26 sizeof(nfs_cache_getent_prog), 0600);
27MODULE_PARM_DESC(cache_getent, "Path to the client cache upcall program");
28module_param_named(cache_getent_timeout, nfs_cache_getent_timeout, ulong, 0600);
29MODULE_PARM_DESC(cache_getent_timeout, "Timeout (in seconds) after which "
30 "the cache upcall is assumed to have failed");
31
32int nfs_cache_upcall(struct cache_detail *cd, char *entry_name)
33{
34 static char *envp[] = { "HOME=/",
35 "TERM=linux",
36 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
37 NULL
38 };
39 char *argv[] = {
40 nfs_cache_getent_prog,
41 cd->name,
42 entry_name,
43 NULL
44 };
45 int ret = -EACCES;
46
47 if (nfs_cache_getent_prog[0] == '\0')
48 goto out;
49 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
50 /*
51 * Disable the upcall mechanism if we're getting an ENOENT or
52 * EACCES error. The admin can re-enable it on the fly by using
53 * sysfs to set the 'cache_getent' parameter once the problem
54 * has been fixed.
55 */
56 if (ret == -ENOENT || ret == -EACCES)
57 nfs_cache_getent_prog[0] = '\0';
58out:
59 return ret > 0 ? 0 : ret;
60}
61
62/*
63 * Deferred request handling
64 */
65void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq)
66{
67 if (atomic_dec_and_test(&dreq->count))
68 kfree(dreq);
69}
70
71static void nfs_dns_cache_revisit(struct cache_deferred_req *d, int toomany)
72{
73 struct nfs_cache_defer_req *dreq;
74
75 dreq = container_of(d, struct nfs_cache_defer_req, deferred_req);
76
77 complete_all(&dreq->completion);
78 nfs_cache_defer_req_put(dreq);
79}
80
81static struct cache_deferred_req *nfs_dns_cache_defer(struct cache_req *req)
82{
83 struct nfs_cache_defer_req *dreq;
84
85 dreq = container_of(req, struct nfs_cache_defer_req, req);
86 dreq->deferred_req.revisit = nfs_dns_cache_revisit;
87 atomic_inc(&dreq->count);
88
89 return &dreq->deferred_req;
90}
91
92struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void)
93{
94 struct nfs_cache_defer_req *dreq;
95
96 dreq = kzalloc(sizeof(*dreq), GFP_KERNEL);
97 if (dreq) {
98 init_completion(&dreq->completion);
99 atomic_set(&dreq->count, 1);
100 dreq->req.defer = nfs_dns_cache_defer;
101 }
102 return dreq;
103}
104
105int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
106{
107 if (wait_for_completion_timeout(&dreq->completion,
108 nfs_cache_getent_timeout * HZ) == 0)
109 return -ETIMEDOUT;
110 return 0;
111}
112
113int nfs_cache_register(struct cache_detail *cd)
114{
115 struct nameidata nd;
116 struct vfsmount *mnt;
117 int ret;
118
119 mnt = rpc_get_mount();
120 if (IS_ERR(mnt))
121 return PTR_ERR(mnt);
122 ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd);
123 if (ret)
124 goto err;
125 ret = sunrpc_cache_register_pipefs(nd.path.dentry,
126 cd->name, 0600, cd);
127 path_put(&nd.path);
128 if (!ret)
129 return ret;
130err:
131 rpc_put_mount();
132 return ret;
133}
134
135void nfs_cache_unregister(struct cache_detail *cd)
136{
137 sunrpc_cache_unregister_pipefs(cd);
138 rpc_put_mount();
139}
140
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
new file mode 100644
index 000000000000..76f856e284e4
--- /dev/null
+++ b/fs/nfs/cache_lib.h
@@ -0,0 +1,27 @@
1/*
2 * Helper routines for the NFS client caches
3 *
4 * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
5 */
6
7#include <linux/completion.h>
8#include <linux/sunrpc/cache.h>
9#include <asm/atomic.h>
10
11/*
12 * Deferred request handling
13 */
14struct nfs_cache_defer_req {
15 struct cache_req req;
16 struct cache_deferred_req deferred_req;
17 struct completion completion;
18 atomic_t count;
19};
20
21extern int nfs_cache_upcall(struct cache_detail *cd, char *entry_name);
22extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void);
23extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq);
24extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq);
25
26extern int nfs_cache_register(struct cache_detail *cd);
27extern void nfs_cache_unregister(struct cache_detail *cd);
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 7f604c7941fb..293fa0528a6e 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -43,21 +43,29 @@ static struct svc_program nfs4_callback_program;
43unsigned int nfs_callback_set_tcpport; 43unsigned int nfs_callback_set_tcpport;
44unsigned short nfs_callback_tcpport; 44unsigned short nfs_callback_tcpport;
45unsigned short nfs_callback_tcpport6; 45unsigned short nfs_callback_tcpport6;
46static const int nfs_set_port_min = 0; 46#define NFS_CALLBACK_MAXPORTNR (65535U)
47static const int nfs_set_port_max = 65535;
48 47
49static int param_set_port(const char *val, struct kernel_param *kp) 48static int param_set_portnr(const char *val, struct kernel_param *kp)
50{ 49{
51 char *endp; 50 unsigned long num;
52 int num = simple_strtol(val, &endp, 0); 51 int ret;
53 if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max) 52
53 if (!val)
54 return -EINVAL;
55 ret = strict_strtoul(val, 0, &num);
56 if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR)
54 return -EINVAL; 57 return -EINVAL;
55 *((int *)kp->arg) = num; 58 *((unsigned int *)kp->arg) = num;
56 return 0; 59 return 0;
57} 60}
58 61
59module_param_call(callback_tcpport, param_set_port, param_get_int, 62static int param_get_portnr(char *buffer, struct kernel_param *kp)
60 &nfs_callback_set_tcpport, 0644); 63{
64 return param_get_uint(buffer, kp);
65}
66#define param_check_portnr(name, p) __param_check(name, p, unsigned int);
67
68module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
61 69
62/* 70/*
63 * This is the NFSv4 callback kernel thread. 71 * This is the NFSv4 callback kernel thread.
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8d25ccb2d51d..e350bd6a2334 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -809,6 +809,9 @@ static int nfs_init_server(struct nfs_server *server,
809 /* Initialise the client representation from the mount data */ 809 /* Initialise the client representation from the mount data */
810 server->flags = data->flags; 810 server->flags = data->flags;
811 server->options = data->options; 811 server->options = data->options;
812 server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
813 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
814 NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
812 815
813 if (data->rsize) 816 if (data->rsize)
814 server->rsize = nfs_block_size(data->rsize, NULL); 817 server->rsize = nfs_block_size(data->rsize, NULL);
@@ -879,6 +882,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
879 server->rsize = NFS_MAX_FILE_IO_SIZE; 882 server->rsize = NFS_MAX_FILE_IO_SIZE;
880 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 883 server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
881 884
885 server->backing_dev_info.name = "nfs";
882 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; 886 server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
883 887
884 if (server->wsize > max_rpc_payload) 888 if (server->wsize > max_rpc_payload)
@@ -1074,10 +1078,6 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
1074 (unsigned long long) server->fsid.major, 1078 (unsigned long long) server->fsid.major,
1075 (unsigned long long) server->fsid.minor); 1079 (unsigned long long) server->fsid.minor);
1076 1080
1077 BUG_ON(!server->nfs_client);
1078 BUG_ON(!server->nfs_client->rpc_ops);
1079 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1080
1081 spin_lock(&nfs_client_lock); 1081 spin_lock(&nfs_client_lock);
1082 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); 1082 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1083 list_add_tail(&server->master_link, &nfs_volume_list); 1083 list_add_tail(&server->master_link, &nfs_volume_list);
@@ -1274,7 +1274,7 @@ static int nfs4_init_server(struct nfs_server *server,
1274 1274
1275 /* Initialise the client representation from the mount data */ 1275 /* Initialise the client representation from the mount data */
1276 server->flags = data->flags; 1276 server->flags = data->flags;
1277 server->caps |= NFS_CAP_ATOMIC_OPEN; 1277 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
1278 server->options = data->options; 1278 server->options = data->options;
1279 1279
1280 /* Get a client record */ 1280 /* Get a client record */
@@ -1359,10 +1359,6 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
1359 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) 1359 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1360 server->namelen = NFS4_MAXNAMLEN; 1360 server->namelen = NFS4_MAXNAMLEN;
1361 1361
1362 BUG_ON(!server->nfs_client);
1363 BUG_ON(!server->nfs_client->rpc_ops);
1364 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1365
1366 spin_lock(&nfs_client_lock); 1362 spin_lock(&nfs_client_lock);
1367 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks); 1363 list_add_tail(&server->client_link, &server->nfs_client->cl_superblocks);
1368 list_add_tail(&server->master_link, &nfs_volume_list); 1364 list_add_tail(&server->master_link, &nfs_volume_list);
@@ -1400,7 +1396,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1400 1396
1401 /* Initialise the client representation from the parent server */ 1397 /* Initialise the client representation from the parent server */
1402 nfs_server_copy_userdata(server, parent_server); 1398 nfs_server_copy_userdata(server, parent_server);
1403 server->caps |= NFS_CAP_ATOMIC_OPEN; 1399 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
1404 1400
1405 /* Get a client representation. 1401 /* Get a client representation.
1406 * Note: NFSv4 always uses TCP, */ 1402 * Note: NFSv4 always uses TCP, */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e4e089a8f294..6c3210099d51 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -934,9 +934,6 @@ out:
934 * back into its cache. We let the server do generic write 934 * back into its cache. We let the server do generic write
935 * parameter checking and report problems. 935 * parameter checking and report problems.
936 * 936 *
937 * We also avoid an unnecessary invocation of generic_osync_inode(),
938 * as it is fairly meaningless to sync the metadata of an NFS file.
939 *
940 * We eliminate local atime updates, see direct read above. 937 * We eliminate local atime updates, see direct read above.
941 * 938 *
942 * We avoid unnecessary page cache invalidations for normal cached 939 * We avoid unnecessary page cache invalidations for normal cached
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
new file mode 100644
index 000000000000..f4d54ba97cc6
--- /dev/null
+++ b/fs/nfs/dns_resolve.c
@@ -0,0 +1,335 @@
1/*
2 * linux/fs/nfs/dns_resolve.c
3 *
4 * Copyright (c) 2009 Trond Myklebust <Trond.Myklebust@netapp.com>
5 *
6 * Resolves DNS hostnames into valid ip addresses
7 */
8
9#include <linux/hash.h>
10#include <linux/string.h>
11#include <linux/kmod.h>
12#include <linux/module.h>
13#include <linux/socket.h>
14#include <linux/seq_file.h>
15#include <linux/inet.h>
16#include <linux/sunrpc/clnt.h>
17#include <linux/sunrpc/cache.h>
18#include <linux/sunrpc/svcauth.h>
19
20#include "dns_resolve.h"
21#include "cache_lib.h"
22
23#define NFS_DNS_HASHBITS 4
24#define NFS_DNS_HASHTBL_SIZE (1 << NFS_DNS_HASHBITS)
25
26static struct cache_head *nfs_dns_table[NFS_DNS_HASHTBL_SIZE];
27
28struct nfs_dns_ent {
29 struct cache_head h;
30
31 char *hostname;
32 size_t namelen;
33
34 struct sockaddr_storage addr;
35 size_t addrlen;
36};
37
38
39static void nfs_dns_ent_init(struct cache_head *cnew,
40 struct cache_head *ckey)
41{
42 struct nfs_dns_ent *new;
43 struct nfs_dns_ent *key;
44
45 new = container_of(cnew, struct nfs_dns_ent, h);
46 key = container_of(ckey, struct nfs_dns_ent, h);
47
48 kfree(new->hostname);
49 new->hostname = kstrndup(key->hostname, key->namelen, GFP_KERNEL);
50 if (new->hostname) {
51 new->namelen = key->namelen;
52 memcpy(&new->addr, &key->addr, key->addrlen);
53 new->addrlen = key->addrlen;
54 } else {
55 new->namelen = 0;
56 new->addrlen = 0;
57 }
58}
59
60static void nfs_dns_ent_put(struct kref *ref)
61{
62 struct nfs_dns_ent *item;
63
64 item = container_of(ref, struct nfs_dns_ent, h.ref);
65 kfree(item->hostname);
66 kfree(item);
67}
68
69static struct cache_head *nfs_dns_ent_alloc(void)
70{
71 struct nfs_dns_ent *item = kmalloc(sizeof(*item), GFP_KERNEL);
72
73 if (item != NULL) {
74 item->hostname = NULL;
75 item->namelen = 0;
76 item->addrlen = 0;
77 return &item->h;
78 }
79 return NULL;
80};
81
82static unsigned int nfs_dns_hash(const struct nfs_dns_ent *key)
83{
84 return hash_str(key->hostname, NFS_DNS_HASHBITS);
85}
86
87static void nfs_dns_request(struct cache_detail *cd,
88 struct cache_head *ch,
89 char **bpp, int *blen)
90{
91 struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
92
93 qword_add(bpp, blen, key->hostname);
94 (*bpp)[-1] = '\n';
95}
96
97static int nfs_dns_upcall(struct cache_detail *cd,
98 struct cache_head *ch)
99{
100 struct nfs_dns_ent *key = container_of(ch, struct nfs_dns_ent, h);
101 int ret;
102
103 ret = nfs_cache_upcall(cd, key->hostname);
104 if (ret)
105 ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request);
106 return ret;
107}
108
109static int nfs_dns_match(struct cache_head *ca,
110 struct cache_head *cb)
111{
112 struct nfs_dns_ent *a;
113 struct nfs_dns_ent *b;
114
115 a = container_of(ca, struct nfs_dns_ent, h);
116 b = container_of(cb, struct nfs_dns_ent, h);
117
118 if (a->namelen == 0 || a->namelen != b->namelen)
119 return 0;
120 return memcmp(a->hostname, b->hostname, a->namelen) == 0;
121}
122
123static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
124 struct cache_head *h)
125{
126 struct nfs_dns_ent *item;
127 long ttl;
128
129 if (h == NULL) {
130 seq_puts(m, "# ip address hostname ttl\n");
131 return 0;
132 }
133 item = container_of(h, struct nfs_dns_ent, h);
134 ttl = (long)item->h.expiry_time - (long)get_seconds();
135 if (ttl < 0)
136 ttl = 0;
137
138 if (!test_bit(CACHE_NEGATIVE, &h->flags)) {
139 char buf[INET6_ADDRSTRLEN+IPV6_SCOPE_ID_LEN+1];
140
141 rpc_ntop((struct sockaddr *)&item->addr, buf, sizeof(buf));
142 seq_printf(m, "%15s ", buf);
143 } else
144 seq_puts(m, "<none> ");
145 seq_printf(m, "%15s %ld\n", item->hostname, ttl);
146 return 0;
147}
148
149struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd,
150 struct nfs_dns_ent *key)
151{
152 struct cache_head *ch;
153
154 ch = sunrpc_cache_lookup(cd,
155 &key->h,
156 nfs_dns_hash(key));
157 if (!ch)
158 return NULL;
159 return container_of(ch, struct nfs_dns_ent, h);
160}
161
162struct nfs_dns_ent *nfs_dns_update(struct cache_detail *cd,
163 struct nfs_dns_ent *new,
164 struct nfs_dns_ent *key)
165{
166 struct cache_head *ch;
167
168 ch = sunrpc_cache_update(cd,
169 &new->h, &key->h,
170 nfs_dns_hash(key));
171 if (!ch)
172 return NULL;
173 return container_of(ch, struct nfs_dns_ent, h);
174}
175
176static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
177{
178 char buf1[NFS_DNS_HOSTNAME_MAXLEN+1];
179 struct nfs_dns_ent key, *item;
180 unsigned long ttl;
181 ssize_t len;
182 int ret = -EINVAL;
183
184 if (buf[buflen-1] != '\n')
185 goto out;
186 buf[buflen-1] = '\0';
187
188 len = qword_get(&buf, buf1, sizeof(buf1));
189 if (len <= 0)
190 goto out;
191 key.addrlen = rpc_pton(buf1, len,
192 (struct sockaddr *)&key.addr,
193 sizeof(key.addr));
194
195 len = qword_get(&buf, buf1, sizeof(buf1));
196 if (len <= 0)
197 goto out;
198
199 key.hostname = buf1;
200 key.namelen = len;
201 memset(&key.h, 0, sizeof(key.h));
202
203 ttl = get_expiry(&buf);
204 if (ttl == 0)
205 goto out;
206 key.h.expiry_time = ttl + get_seconds();
207
208 ret = -ENOMEM;
209 item = nfs_dns_lookup(cd, &key);
210 if (item == NULL)
211 goto out;
212
213 if (key.addrlen == 0)
214 set_bit(CACHE_NEGATIVE, &key.h.flags);
215
216 item = nfs_dns_update(cd, &key, item);
217 if (item == NULL)
218 goto out;
219
220 ret = 0;
221 cache_put(&item->h, cd);
222out:
223 return ret;
224}
225
226static struct cache_detail nfs_dns_resolve = {
227 .owner = THIS_MODULE,
228 .hash_size = NFS_DNS_HASHTBL_SIZE,
229 .hash_table = nfs_dns_table,
230 .name = "dns_resolve",
231 .cache_put = nfs_dns_ent_put,
232 .cache_upcall = nfs_dns_upcall,
233 .cache_parse = nfs_dns_parse,
234 .cache_show = nfs_dns_show,
235 .match = nfs_dns_match,
236 .init = nfs_dns_ent_init,
237 .update = nfs_dns_ent_init,
238 .alloc = nfs_dns_ent_alloc,
239};
240
241static int do_cache_lookup(struct cache_detail *cd,
242 struct nfs_dns_ent *key,
243 struct nfs_dns_ent **item,
244 struct nfs_cache_defer_req *dreq)
245{
246 int ret = -ENOMEM;
247
248 *item = nfs_dns_lookup(cd, key);
249 if (*item) {
250 ret = cache_check(cd, &(*item)->h, &dreq->req);
251 if (ret)
252 *item = NULL;
253 }
254 return ret;
255}
256
257static int do_cache_lookup_nowait(struct cache_detail *cd,
258 struct nfs_dns_ent *key,
259 struct nfs_dns_ent **item)
260{
261 int ret = -ENOMEM;
262
263 *item = nfs_dns_lookup(cd, key);
264 if (!*item)
265 goto out_err;
266 ret = -ETIMEDOUT;
267 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
268 || (*item)->h.expiry_time < get_seconds()
269 || cd->flush_time > (*item)->h.last_refresh)
270 goto out_put;
271 ret = -ENOENT;
272 if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
273 goto out_put;
274 return 0;
275out_put:
276 cache_put(&(*item)->h, cd);
277out_err:
278 *item = NULL;
279 return ret;
280}
281
282static int do_cache_lookup_wait(struct cache_detail *cd,
283 struct nfs_dns_ent *key,
284 struct nfs_dns_ent **item)
285{
286 struct nfs_cache_defer_req *dreq;
287 int ret = -ENOMEM;
288
289 dreq = nfs_cache_defer_req_alloc();
290 if (!dreq)
291 goto out;
292 ret = do_cache_lookup(cd, key, item, dreq);
293 if (ret == -EAGAIN) {
294 ret = nfs_cache_wait_for_upcall(dreq);
295 if (!ret)
296 ret = do_cache_lookup_nowait(cd, key, item);
297 }
298 nfs_cache_defer_req_put(dreq);
299out:
300 return ret;
301}
302
303ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
304 struct sockaddr *sa, size_t salen)
305{
306 struct nfs_dns_ent key = {
307 .hostname = name,
308 .namelen = namelen,
309 };
310 struct nfs_dns_ent *item = NULL;
311 ssize_t ret;
312
313 ret = do_cache_lookup_wait(&nfs_dns_resolve, &key, &item);
314 if (ret == 0) {
315 if (salen >= item->addrlen) {
316 memcpy(sa, &item->addr, item->addrlen);
317 ret = item->addrlen;
318 } else
319 ret = -EOVERFLOW;
320 cache_put(&item->h, &nfs_dns_resolve);
321 } else if (ret == -ENOENT)
322 ret = -ESRCH;
323 return ret;
324}
325
326int nfs_dns_resolver_init(void)
327{
328 return nfs_cache_register(&nfs_dns_resolve);
329}
330
331void nfs_dns_resolver_destroy(void)
332{
333 nfs_cache_unregister(&nfs_dns_resolve);
334}
335
diff --git a/fs/nfs/dns_resolve.h b/fs/nfs/dns_resolve.h
new file mode 100644
index 000000000000..a3f0938babf7
--- /dev/null
+++ b/fs/nfs/dns_resolve.h
@@ -0,0 +1,14 @@
1/*
2 * Resolve DNS hostnames into valid ip addresses
3 */
4#ifndef __LINUX_FS_NFS_DNS_RESOLVE_H
5#define __LINUX_FS_NFS_DNS_RESOLVE_H
6
7#define NFS_DNS_HOSTNAME_MAXLEN (128)
8
9extern int nfs_dns_resolver_init(void);
10extern void nfs_dns_resolver_destroy(void);
11extern ssize_t nfs_dns_resolve_name(char *name, size_t namelen,
12 struct sockaddr *sa, size_t salen);
13
14#endif
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 05062329b678..5021b75d2d1e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -328,6 +328,42 @@ nfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)
328} 328}
329 329
330/* 330/*
331 * Decide whether a read/modify/write cycle may be more efficient
332 * then a modify/write/read cycle when writing to a page in the
333 * page cache.
334 *
335 * The modify/write/read cycle may occur if a page is read before
336 * being completely filled by the writer. In this situation, the
337 * page must be completely written to stable storage on the server
338 * before it can be refilled by reading in the page from the server.
339 * This can lead to expensive, small, FILE_SYNC mode writes being
340 * done.
341 *
342 * It may be more efficient to read the page first if the file is
343 * open for reading in addition to writing, the page is not marked
344 * as Uptodate, it is not dirty or waiting to be committed,
345 * indicating that it was previously allocated and then modified,
346 * that there were valid bytes of data in that range of the file,
347 * and that the new data won't completely replace the old data in
348 * that range of the file.
349 */
350static int nfs_want_read_modify_write(struct file *file, struct page *page,
351 loff_t pos, unsigned len)
352{
353 unsigned int pglen = nfs_page_length(page);
354 unsigned int offset = pos & (PAGE_CACHE_SIZE - 1);
355 unsigned int end = offset + len;
356
357 if ((file->f_mode & FMODE_READ) && /* open for read? */
358 !PageUptodate(page) && /* Uptodate? */
359 !PagePrivate(page) && /* i/o request already? */
360 pglen && /* valid bytes of file? */
361 (end < pglen || offset)) /* replace all valid bytes? */
362 return 1;
363 return 0;
364}
365
366/*
331 * This does the "real" work of the write. We must allocate and lock the 367 * This does the "real" work of the write. We must allocate and lock the
332 * page to be sent back to the generic routine, which then copies the 368 * page to be sent back to the generic routine, which then copies the
333 * data from user space. 369 * data from user space.
@@ -340,15 +376,16 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
340 struct page **pagep, void **fsdata) 376 struct page **pagep, void **fsdata)
341{ 377{
342 int ret; 378 int ret;
343 pgoff_t index; 379 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
344 struct page *page; 380 struct page *page;
345 index = pos >> PAGE_CACHE_SHIFT; 381 int once_thru = 0;
346 382
347 dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n", 383 dfprintk(PAGECACHE, "NFS: write_begin(%s/%s(%ld), %u@%lld)\n",
348 file->f_path.dentry->d_parent->d_name.name, 384 file->f_path.dentry->d_parent->d_name.name,
349 file->f_path.dentry->d_name.name, 385 file->f_path.dentry->d_name.name,
350 mapping->host->i_ino, len, (long long) pos); 386 mapping->host->i_ino, len, (long long) pos);
351 387
388start:
352 /* 389 /*
353 * Prevent starvation issues if someone is doing a consistency 390 * Prevent starvation issues if someone is doing a consistency
354 * sync-to-disk 391 * sync-to-disk
@@ -367,6 +404,13 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
367 if (ret) { 404 if (ret) {
368 unlock_page(page); 405 unlock_page(page);
369 page_cache_release(page); 406 page_cache_release(page);
407 } else if (!once_thru &&
408 nfs_want_read_modify_write(file, page, pos, len)) {
409 once_thru = 1;
410 ret = nfs_readpage(file, page);
411 page_cache_release(page);
412 if (!ret)
413 goto start;
370 } 414 }
371 return ret; 415 return ret;
372} 416}
@@ -479,6 +523,7 @@ const struct address_space_operations nfs_file_aops = {
479 .invalidatepage = nfs_invalidate_page, 523 .invalidatepage = nfs_invalidate_page,
480 .releasepage = nfs_release_page, 524 .releasepage = nfs_release_page,
481 .direct_IO = nfs_direct_IO, 525 .direct_IO = nfs_direct_IO,
526 .migratepage = nfs_migrate_page,
482 .launder_page = nfs_launder_page, 527 .launder_page = nfs_launder_page,
483}; 528};
484 529
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 86147b0ab2cf..21a84d45916f 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -101,7 +101,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
101 101
102static unsigned int fnvhash32(const void *, size_t); 102static unsigned int fnvhash32(const void *, size_t);
103 103
104static struct rpc_pipe_ops idmap_upcall_ops = { 104static const struct rpc_pipe_ops idmap_upcall_ops = {
105 .upcall = idmap_pipe_upcall, 105 .upcall = idmap_pipe_upcall,
106 .downcall = idmap_pipe_downcall, 106 .downcall = idmap_pipe_downcall,
107 .destroy_msg = idmap_pipe_destroy_msg, 107 .destroy_msg = idmap_pipe_destroy_msg,
@@ -119,8 +119,8 @@ nfs_idmap_new(struct nfs_client *clp)
119 if (idmap == NULL) 119 if (idmap == NULL)
120 return -ENOMEM; 120 return -ENOMEM;
121 121
122 idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap", 122 idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry,
123 idmap, &idmap_upcall_ops, 0); 123 "idmap", idmap, &idmap_upcall_ops, 0);
124 if (IS_ERR(idmap->idmap_dentry)) { 124 if (IS_ERR(idmap->idmap_dentry)) {
125 error = PTR_ERR(idmap->idmap_dentry); 125 error = PTR_ERR(idmap->idmap_dentry);
126 kfree(idmap); 126 kfree(idmap);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd7938eda6a8..060022b4651c 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -46,6 +46,7 @@
46#include "iostat.h" 46#include "iostat.h"
47#include "internal.h" 47#include "internal.h"
48#include "fscache.h" 48#include "fscache.h"
49#include "dns_resolve.h"
49 50
50#define NFSDBG_FACILITY NFSDBG_VFS 51#define NFSDBG_FACILITY NFSDBG_VFS
51 52
@@ -286,6 +287,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
286 /* We can't support update_atime(), since the server will reset it */ 287 /* We can't support update_atime(), since the server will reset it */
287 inode->i_flags |= S_NOATIME|S_NOCMTIME; 288 inode->i_flags |= S_NOATIME|S_NOCMTIME;
288 inode->i_mode = fattr->mode; 289 inode->i_mode = fattr->mode;
290 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
291 && nfs_server_capable(inode, NFS_CAP_MODE))
292 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
293 | NFS_INO_INVALID_ACCESS
294 | NFS_INO_INVALID_ACL;
289 /* Why so? Because we want revalidate for devices/FIFOs, and 295 /* Why so? Because we want revalidate for devices/FIFOs, and
290 * that's precisely what we have in nfs_file_inode_operations. 296 * that's precisely what we have in nfs_file_inode_operations.
291 */ 297 */
@@ -330,20 +336,46 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
330 nfsi->attr_gencount = fattr->gencount; 336 nfsi->attr_gencount = fattr->gencount;
331 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 337 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
332 inode->i_atime = fattr->atime; 338 inode->i_atime = fattr->atime;
339 else if (nfs_server_capable(inode, NFS_CAP_ATIME))
340 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
333 if (fattr->valid & NFS_ATTR_FATTR_MTIME) 341 if (fattr->valid & NFS_ATTR_FATTR_MTIME)
334 inode->i_mtime = fattr->mtime; 342 inode->i_mtime = fattr->mtime;
343 else if (nfs_server_capable(inode, NFS_CAP_MTIME))
344 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
345 | NFS_INO_INVALID_DATA;
335 if (fattr->valid & NFS_ATTR_FATTR_CTIME) 346 if (fattr->valid & NFS_ATTR_FATTR_CTIME)
336 inode->i_ctime = fattr->ctime; 347 inode->i_ctime = fattr->ctime;
348 else if (nfs_server_capable(inode, NFS_CAP_CTIME))
349 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
350 | NFS_INO_INVALID_ACCESS
351 | NFS_INO_INVALID_ACL;
337 if (fattr->valid & NFS_ATTR_FATTR_CHANGE) 352 if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
338 nfsi->change_attr = fattr->change_attr; 353 nfsi->change_attr = fattr->change_attr;
354 else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
355 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
356 | NFS_INO_INVALID_DATA;
339 if (fattr->valid & NFS_ATTR_FATTR_SIZE) 357 if (fattr->valid & NFS_ATTR_FATTR_SIZE)
340 inode->i_size = nfs_size_to_loff_t(fattr->size); 358 inode->i_size = nfs_size_to_loff_t(fattr->size);
359 else
360 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
361 | NFS_INO_INVALID_DATA
362 | NFS_INO_REVAL_PAGECACHE;
341 if (fattr->valid & NFS_ATTR_FATTR_NLINK) 363 if (fattr->valid & NFS_ATTR_FATTR_NLINK)
342 inode->i_nlink = fattr->nlink; 364 inode->i_nlink = fattr->nlink;
365 else if (nfs_server_capable(inode, NFS_CAP_NLINK))
366 nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
343 if (fattr->valid & NFS_ATTR_FATTR_OWNER) 367 if (fattr->valid & NFS_ATTR_FATTR_OWNER)
344 inode->i_uid = fattr->uid; 368 inode->i_uid = fattr->uid;
369 else if (nfs_server_capable(inode, NFS_CAP_OWNER))
370 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
371 | NFS_INO_INVALID_ACCESS
372 | NFS_INO_INVALID_ACL;
345 if (fattr->valid & NFS_ATTR_FATTR_GROUP) 373 if (fattr->valid & NFS_ATTR_FATTR_GROUP)
346 inode->i_gid = fattr->gid; 374 inode->i_gid = fattr->gid;
375 else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
376 nfsi->cache_validity |= NFS_INO_INVALID_ATTR
377 | NFS_INO_INVALID_ACCESS
378 | NFS_INO_INVALID_ACL;
347 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) 379 if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
348 inode->i_blocks = fattr->du.nfs2.blocks; 380 inode->i_blocks = fattr->du.nfs2.blocks;
349 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 381 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -1145,6 +1177,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1145 loff_t cur_isize, new_isize; 1177 loff_t cur_isize, new_isize;
1146 unsigned long invalid = 0; 1178 unsigned long invalid = 0;
1147 unsigned long now = jiffies; 1179 unsigned long now = jiffies;
1180 unsigned long save_cache_validity;
1148 1181
1149 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", 1182 dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
1150 __func__, inode->i_sb->s_id, inode->i_ino, 1183 __func__, inode->i_sb->s_id, inode->i_ino,
@@ -1171,10 +1204,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1171 */ 1204 */
1172 nfsi->read_cache_jiffies = fattr->time_start; 1205 nfsi->read_cache_jiffies = fattr->time_start;
1173 1206
1174 if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME))) 1207 save_cache_validity = nfsi->cache_validity;
1175 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR 1208 nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
1176 | NFS_INO_INVALID_ATIME 1209 | NFS_INO_INVALID_ATIME
1177 | NFS_INO_REVAL_PAGECACHE); 1210 | NFS_INO_REVAL_FORCED
1211 | NFS_INO_REVAL_PAGECACHE);
1178 1212
1179 /* Do atomic weak cache consistency updates */ 1213 /* Do atomic weak cache consistency updates */
1180 nfs_wcc_update_inode(inode, fattr); 1214 nfs_wcc_update_inode(inode, fattr);
@@ -1189,7 +1223,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1189 nfs_force_lookup_revalidate(inode); 1223 nfs_force_lookup_revalidate(inode);
1190 nfsi->change_attr = fattr->change_attr; 1224 nfsi->change_attr = fattr->change_attr;
1191 } 1225 }
1192 } 1226 } else if (server->caps & NFS_CAP_CHANGE_ATTR)
1227 invalid |= save_cache_validity;
1193 1228
1194 if (fattr->valid & NFS_ATTR_FATTR_MTIME) { 1229 if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
1195 /* NFSv2/v3: Check if the mtime agrees */ 1230 /* NFSv2/v3: Check if the mtime agrees */
@@ -1201,7 +1236,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1201 nfs_force_lookup_revalidate(inode); 1236 nfs_force_lookup_revalidate(inode);
1202 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); 1237 memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
1203 } 1238 }
1204 } 1239 } else if (server->caps & NFS_CAP_MTIME)
1240 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1241 | NFS_INO_INVALID_DATA
1242 | NFS_INO_REVAL_PAGECACHE
1243 | NFS_INO_REVAL_FORCED);
1244
1205 if (fattr->valid & NFS_ATTR_FATTR_CTIME) { 1245 if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
1206 /* If ctime has changed we should definitely clear access+acl caches */ 1246 /* If ctime has changed we should definitely clear access+acl caches */
1207 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { 1247 if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
@@ -1215,7 +1255,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1215 } 1255 }
1216 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); 1256 memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
1217 } 1257 }
1218 } 1258 } else if (server->caps & NFS_CAP_CTIME)
1259 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1260 | NFS_INO_INVALID_ACCESS
1261 | NFS_INO_INVALID_ACL
1262 | NFS_INO_REVAL_FORCED);
1219 1263
1220 /* Check if our cached file size is stale */ 1264 /* Check if our cached file size is stale */
1221 if (fattr->valid & NFS_ATTR_FATTR_SIZE) { 1265 if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
@@ -1231,30 +1275,50 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1231 dprintk("NFS: isize change on server for file %s/%ld\n", 1275 dprintk("NFS: isize change on server for file %s/%ld\n",
1232 inode->i_sb->s_id, inode->i_ino); 1276 inode->i_sb->s_id, inode->i_ino);
1233 } 1277 }
1234 } 1278 } else
1279 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1280 | NFS_INO_REVAL_PAGECACHE
1281 | NFS_INO_REVAL_FORCED);
1235 1282
1236 1283
1237 if (fattr->valid & NFS_ATTR_FATTR_ATIME) 1284 if (fattr->valid & NFS_ATTR_FATTR_ATIME)
1238 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); 1285 memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
1286 else if (server->caps & NFS_CAP_ATIME)
1287 invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME
1288 | NFS_INO_REVAL_FORCED);
1239 1289
1240 if (fattr->valid & NFS_ATTR_FATTR_MODE) { 1290 if (fattr->valid & NFS_ATTR_FATTR_MODE) {
1241 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) { 1291 if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
1242 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1292 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1243 inode->i_mode = fattr->mode; 1293 inode->i_mode = fattr->mode;
1244 } 1294 }
1245 } 1295 } else if (server->caps & NFS_CAP_MODE)
1296 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1297 | NFS_INO_INVALID_ACCESS
1298 | NFS_INO_INVALID_ACL
1299 | NFS_INO_REVAL_FORCED);
1300
1246 if (fattr->valid & NFS_ATTR_FATTR_OWNER) { 1301 if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
1247 if (inode->i_uid != fattr->uid) { 1302 if (inode->i_uid != fattr->uid) {
1248 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1303 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1249 inode->i_uid = fattr->uid; 1304 inode->i_uid = fattr->uid;
1250 } 1305 }
1251 } 1306 } else if (server->caps & NFS_CAP_OWNER)
1307 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1308 | NFS_INO_INVALID_ACCESS
1309 | NFS_INO_INVALID_ACL
1310 | NFS_INO_REVAL_FORCED);
1311
1252 if (fattr->valid & NFS_ATTR_FATTR_GROUP) { 1312 if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
1253 if (inode->i_gid != fattr->gid) { 1313 if (inode->i_gid != fattr->gid) {
1254 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; 1314 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
1255 inode->i_gid = fattr->gid; 1315 inode->i_gid = fattr->gid;
1256 } 1316 }
1257 } 1317 } else if (server->caps & NFS_CAP_OWNER_GROUP)
1318 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1319 | NFS_INO_INVALID_ACCESS
1320 | NFS_INO_INVALID_ACL
1321 | NFS_INO_REVAL_FORCED);
1258 1322
1259 if (fattr->valid & NFS_ATTR_FATTR_NLINK) { 1323 if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
1260 if (inode->i_nlink != fattr->nlink) { 1324 if (inode->i_nlink != fattr->nlink) {
@@ -1263,7 +1327,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1263 invalid |= NFS_INO_INVALID_DATA; 1327 invalid |= NFS_INO_INVALID_DATA;
1264 inode->i_nlink = fattr->nlink; 1328 inode->i_nlink = fattr->nlink;
1265 } 1329 }
1266 } 1330 } else if (server->caps & NFS_CAP_NLINK)
1331 invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
1332 | NFS_INO_REVAL_FORCED);
1267 1333
1268 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { 1334 if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
1269 /* 1335 /*
@@ -1293,9 +1359,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
1293 || S_ISLNK(inode->i_mode))) 1359 || S_ISLNK(inode->i_mode)))
1294 invalid &= ~NFS_INO_INVALID_DATA; 1360 invalid &= ~NFS_INO_INVALID_DATA;
1295 if (!nfs_have_delegation(inode, FMODE_READ) || 1361 if (!nfs_have_delegation(inode, FMODE_READ) ||
1296 (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) 1362 (save_cache_validity & NFS_INO_REVAL_FORCED))
1297 nfsi->cache_validity |= invalid; 1363 nfsi->cache_validity |= invalid;
1298 nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
1299 1364
1300 return 0; 1365 return 0;
1301 out_changed: 1366 out_changed:
@@ -1442,6 +1507,10 @@ static int __init init_nfs_fs(void)
1442{ 1507{
1443 int err; 1508 int err;
1444 1509
1510 err = nfs_dns_resolver_init();
1511 if (err < 0)
1512 goto out8;
1513
1445 err = nfs_fscache_register(); 1514 err = nfs_fscache_register();
1446 if (err < 0) 1515 if (err < 0)
1447 goto out7; 1516 goto out7;
@@ -1500,6 +1569,8 @@ out5:
1500out6: 1569out6:
1501 nfs_fscache_unregister(); 1570 nfs_fscache_unregister();
1502out7: 1571out7:
1572 nfs_dns_resolver_destroy();
1573out8:
1503 return err; 1574 return err;
1504} 1575}
1505 1576
@@ -1511,6 +1582,7 @@ static void __exit exit_nfs_fs(void)
1511 nfs_destroy_inodecache(); 1582 nfs_destroy_inodecache();
1512 nfs_destroy_nfspagecache(); 1583 nfs_destroy_nfspagecache();
1513 nfs_fscache_unregister(); 1584 nfs_fscache_unregister();
1585 nfs_dns_resolver_destroy();
1514#ifdef CONFIG_PROC_FS 1586#ifdef CONFIG_PROC_FS
1515 rpc_proc_unregister("nfs"); 1587 rpc_proc_unregister("nfs");
1516#endif 1588#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7dd90a6769d0..e21b1bb9972f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -49,6 +49,11 @@ struct nfs_clone_mount {
49#define NFS_MAX_SECFLAVORS (12) 49#define NFS_MAX_SECFLAVORS (12)
50 50
51/* 51/*
52 * Value used if the user did not specify a port value.
53 */
54#define NFS_UNSPEC_PORT (-1)
55
56/*
52 * In-kernel mount arguments 57 * In-kernel mount arguments
53 */ 58 */
54struct nfs_parsed_mount_data { 59struct nfs_parsed_mount_data {
@@ -63,6 +68,7 @@ struct nfs_parsed_mount_data {
63 unsigned int auth_flavor_len; 68 unsigned int auth_flavor_len;
64 rpc_authflavor_t auth_flavors[1]; 69 rpc_authflavor_t auth_flavors[1];
65 char *client_address; 70 char *client_address;
71 unsigned int version;
66 unsigned int minorversion; 72 unsigned int minorversion;
67 char *fscache_uniq; 73 char *fscache_uniq;
68 74
@@ -71,7 +77,7 @@ struct nfs_parsed_mount_data {
71 size_t addrlen; 77 size_t addrlen;
72 char *hostname; 78 char *hostname;
73 u32 version; 79 u32 version;
74 unsigned short port; 80 int port;
75 unsigned short protocol; 81 unsigned short protocol;
76 } mount_server; 82 } mount_server;
77 83
@@ -80,7 +86,7 @@ struct nfs_parsed_mount_data {
80 size_t addrlen; 86 size_t addrlen;
81 char *hostname; 87 char *hostname;
82 char *export_path; 88 char *export_path;
83 unsigned short port; 89 int port;
84 unsigned short protocol; 90 unsigned short protocol;
85 } nfs_server; 91 } nfs_server;
86 92
@@ -102,6 +108,7 @@ struct nfs_mount_request {
102}; 108};
103 109
104extern int nfs_mount(struct nfs_mount_request *info); 110extern int nfs_mount(struct nfs_mount_request *info);
111extern void nfs_umount(const struct nfs_mount_request *info);
105 112
106/* client.c */ 113/* client.c */
107extern struct rpc_program nfs_program; 114extern struct rpc_program nfs_program;
@@ -213,7 +220,6 @@ void nfs_zap_acl_cache(struct inode *inode);
213extern int nfs_wait_bit_killable(void *word); 220extern int nfs_wait_bit_killable(void *word);
214 221
215/* super.c */ 222/* super.c */
216void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *);
217extern struct file_system_type nfs_xdev_fs_type; 223extern struct file_system_type nfs_xdev_fs_type;
218#ifdef CONFIG_NFS_V4 224#ifdef CONFIG_NFS_V4
219extern struct file_system_type nfs4_xdev_fs_type; 225extern struct file_system_type nfs4_xdev_fs_type;
@@ -248,6 +254,12 @@ extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
248 254
249/* write.c */ 255/* write.c */
250extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 256extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
257#ifdef CONFIG_MIGRATION
258extern int nfs_migrate_page(struct address_space *,
259 struct page *, struct page *);
260#else
261#define nfs_migrate_page NULL
262#endif
251 263
252/* nfs4proc.c */ 264/* nfs4proc.c */
253extern int _nfs4_call_sync(struct nfs_server *server, 265extern int _nfs4_call_sync(struct nfs_server *server,
@@ -368,24 +380,3 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
368 return ((unsigned long)len + (unsigned long)base + 380 return ((unsigned long)len + (unsigned long)base +
369 PAGE_SIZE - 1) >> PAGE_SHIFT; 381 PAGE_SIZE - 1) >> PAGE_SHIFT;
370} 382}
371
372#define IPV6_SCOPE_DELIMITER '%'
373
374/*
375 * Set the port number in an address. Be agnostic about the address
376 * family.
377 */
378static inline void nfs_set_port(struct sockaddr *sap, unsigned short port)
379{
380 struct sockaddr_in *ap = (struct sockaddr_in *)sap;
381 struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap;
382
383 switch (sap->sa_family) {
384 case AF_INET:
385 ap->sin_port = htons(port);
386 break;
387 case AF_INET6:
388 ap6->sin6_port = htons(port);
389 break;
390 }
391}
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 38ef9eaec407..0adefc40cc89 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -209,6 +209,71 @@ out_mnt_err:
209 goto out; 209 goto out;
210} 210}
211 211
212/**
213 * nfs_umount - Notify a server that we have unmounted this export
214 * @info: pointer to umount request arguments
215 *
216 * MOUNTPROC_UMNT is advisory, so we set a short timeout, and always
217 * use UDP.
218 */
219void nfs_umount(const struct nfs_mount_request *info)
220{
221 static const struct rpc_timeout nfs_umnt_timeout = {
222 .to_initval = 1 * HZ,
223 .to_maxval = 3 * HZ,
224 .to_retries = 2,
225 };
226 struct rpc_create_args args = {
227 .protocol = IPPROTO_UDP,
228 .address = info->sap,
229 .addrsize = info->salen,
230 .timeout = &nfs_umnt_timeout,
231 .servername = info->hostname,
232 .program = &mnt_program,
233 .version = info->version,
234 .authflavor = RPC_AUTH_UNIX,
235 .flags = RPC_CLNT_CREATE_NOPING,
236 };
237 struct mountres result;
238 struct rpc_message msg = {
239 .rpc_argp = info->dirpath,
240 .rpc_resp = &result,
241 };
242 struct rpc_clnt *clnt;
243 int status;
244
245 if (info->noresvport)
246 args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
247
248 clnt = rpc_create(&args);
249 if (unlikely(IS_ERR(clnt)))
250 goto out_clnt_err;
251
252 dprintk("NFS: sending UMNT request for %s:%s\n",
253 (info->hostname ? info->hostname : "server"), info->dirpath);
254
255 if (info->version == NFS_MNT3_VERSION)
256 msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC3_UMNT];
257 else
258 msg.rpc_proc = &clnt->cl_procinfo[MOUNTPROC_UMNT];
259
260 status = rpc_call_sync(clnt, &msg, 0);
261 rpc_shutdown_client(clnt);
262
263 if (unlikely(status < 0))
264 goto out_call_err;
265
266 return;
267
268out_clnt_err:
269 dprintk("NFS: failed to create UMNT RPC client, status=%ld\n",
270 PTR_ERR(clnt));
271 return;
272
273out_call_err:
274 dprintk("NFS: UMNT request failed, status=%d\n", status);
275}
276
212/* 277/*
213 * XDR encode/decode functions for MOUNT 278 * XDR encode/decode functions for MOUNT
214 */ 279 */
@@ -258,7 +323,7 @@ static int decode_status(struct xdr_stream *xdr, struct mountres *res)
258 return -EIO; 323 return -EIO;
259 status = ntohl(*p); 324 status = ntohl(*p);
260 325
261 for (i = 0; i <= ARRAY_SIZE(mnt_errtbl); i++) { 326 for (i = 0; i < ARRAY_SIZE(mnt_errtbl); i++) {
262 if (mnt_errtbl[i].status == status) { 327 if (mnt_errtbl[i].status == status) {
263 res->errno = mnt_errtbl[i].errno; 328 res->errno = mnt_errtbl[i].errno;
264 return 0; 329 return 0;
@@ -309,7 +374,7 @@ static int decode_fhs_status(struct xdr_stream *xdr, struct mountres *res)
309 return -EIO; 374 return -EIO;
310 status = ntohl(*p); 375 status = ntohl(*p);
311 376
312 for (i = 0; i <= ARRAY_SIZE(mnt3_errtbl); i++) { 377 for (i = 0; i < ARRAY_SIZE(mnt3_errtbl); i++) {
313 if (mnt3_errtbl[i].status == status) { 378 if (mnt3_errtbl[i].status == status) {
314 res->errno = mnt3_errtbl[i].errno; 379 res->errno = mnt3_errtbl[i].errno;
315 return 0; 380 return 0;
@@ -407,6 +472,13 @@ static struct rpc_procinfo mnt_procedures[] = {
407 .p_statidx = MOUNTPROC_MNT, 472 .p_statidx = MOUNTPROC_MNT,
408 .p_name = "MOUNT", 473 .p_name = "MOUNT",
409 }, 474 },
475 [MOUNTPROC_UMNT] = {
476 .p_proc = MOUNTPROC_UMNT,
477 .p_encode = (kxdrproc_t)mnt_enc_dirpath,
478 .p_arglen = MNT_enc_dirpath_sz,
479 .p_statidx = MOUNTPROC_UMNT,
480 .p_name = "UMOUNT",
481 },
410}; 482};
411 483
412static struct rpc_procinfo mnt3_procedures[] = { 484static struct rpc_procinfo mnt3_procedures[] = {
@@ -419,6 +491,13 @@ static struct rpc_procinfo mnt3_procedures[] = {
419 .p_statidx = MOUNTPROC3_MNT, 491 .p_statidx = MOUNTPROC3_MNT,
420 .p_name = "MOUNT", 492 .p_name = "MOUNT",
421 }, 493 },
494 [MOUNTPROC3_UMNT] = {
495 .p_proc = MOUNTPROC3_UMNT,
496 .p_encode = (kxdrproc_t)mnt_enc_dirpath,
497 .p_arglen = MNT_enc_dirpath_sz,
498 .p_statidx = MOUNTPROC3_UMNT,
499 .p_name = "UMOUNT",
500 },
422}; 501};
423 502
424 503
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index d0cc5ce0edfe..ee6a13f05443 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -299,7 +299,6 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
299 299
300/* 300/*
301 * Create a regular file. 301 * Create a regular file.
302 * For now, we don't implement O_EXCL.
303 */ 302 */
304static int 303static int
305nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 304nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 2a2a0a7143ad..2636c26d56fa 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -17,6 +17,7 @@
17#include <linux/inet.h> 17#include <linux/inet.h>
18#include "internal.h" 18#include "internal.h"
19#include "nfs4_fs.h" 19#include "nfs4_fs.h"
20#include "dns_resolve.h"
20 21
21#define NFSDBG_FACILITY NFSDBG_VFS 22#define NFSDBG_FACILITY NFSDBG_VFS
22 23
@@ -95,6 +96,20 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
95 return 0; 96 return 0;
96} 97}
97 98
99static size_t nfs_parse_server_name(char *string, size_t len,
100 struct sockaddr *sa, size_t salen)
101{
102 ssize_t ret;
103
104 ret = rpc_pton(string, len, sa, salen);
105 if (ret == 0) {
106 ret = nfs_dns_resolve_name(string, len, sa, salen);
107 if (ret < 0)
108 ret = 0;
109 }
110 return ret;
111}
112
98static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, 113static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
99 char *page, char *page2, 114 char *page, char *page2,
100 const struct nfs4_fs_location *location) 115 const struct nfs4_fs_location *location)
@@ -121,11 +136,12 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
121 136
122 if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) 137 if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len))
123 continue; 138 continue;
124 nfs_parse_ip_address(buf->data, buf->len, 139 mountdata->addrlen = nfs_parse_server_name(buf->data,
125 mountdata->addr, &mountdata->addrlen); 140 buf->len,
126 if (mountdata->addr->sa_family == AF_UNSPEC) 141 mountdata->addr, mountdata->addrlen);
142 if (mountdata->addrlen == 0)
127 continue; 143 continue;
128 nfs_set_port(mountdata->addr, NFS_PORT); 144 rpc_set_port(mountdata->addr, NFS_PORT);
129 145
130 memcpy(page2, buf->data, buf->len); 146 memcpy(page2, buf->data, buf->len);
131 page2[buf->len] = '\0'; 147 page2[buf->len] = '\0';
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6917311f201c..be6544aef41f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -61,6 +61,8 @@
61#define NFS4_POLL_RETRY_MIN (HZ/10) 61#define NFS4_POLL_RETRY_MIN (HZ/10)
62#define NFS4_POLL_RETRY_MAX (15*HZ) 62#define NFS4_POLL_RETRY_MAX (15*HZ)
63 63
64#define NFS4_MAX_LOOP_ON_RECOVER (10)
65
64struct nfs4_opendata; 66struct nfs4_opendata;
65static int _nfs4_proc_open(struct nfs4_opendata *data); 67static int _nfs4_proc_open(struct nfs4_opendata *data);
66static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 68static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
@@ -426,17 +428,19 @@ out:
426static int nfs4_recover_session(struct nfs4_session *session) 428static int nfs4_recover_session(struct nfs4_session *session)
427{ 429{
428 struct nfs_client *clp = session->clp; 430 struct nfs_client *clp = session->clp;
431 unsigned int loop;
429 int ret; 432 int ret;
430 433
431 for (;;) { 434 for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
432 ret = nfs4_wait_clnt_recover(clp); 435 ret = nfs4_wait_clnt_recover(clp);
433 if (ret != 0) 436 if (ret != 0)
434 return ret; 437 break;
435 if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) 438 if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state))
436 break; 439 break;
437 nfs4_schedule_state_manager(clp); 440 nfs4_schedule_state_manager(clp);
441 ret = -EIO;
438 } 442 }
439 return 0; 443 return ret;
440} 444}
441 445
442static int nfs41_setup_sequence(struct nfs4_session *session, 446static int nfs41_setup_sequence(struct nfs4_session *session,
@@ -1444,18 +1448,20 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1444static int nfs4_recover_expired_lease(struct nfs_server *server) 1448static int nfs4_recover_expired_lease(struct nfs_server *server)
1445{ 1449{
1446 struct nfs_client *clp = server->nfs_client; 1450 struct nfs_client *clp = server->nfs_client;
1451 unsigned int loop;
1447 int ret; 1452 int ret;
1448 1453
1449 for (;;) { 1454 for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
1450 ret = nfs4_wait_clnt_recover(clp); 1455 ret = nfs4_wait_clnt_recover(clp);
1451 if (ret != 0) 1456 if (ret != 0)
1452 return ret; 1457 break;
1453 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && 1458 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1454 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) 1459 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1455 break; 1460 break;
1456 nfs4_schedule_state_recovery(clp); 1461 nfs4_schedule_state_recovery(clp);
1462 ret = -EIO;
1457 } 1463 }
1458 return 0; 1464 return ret;
1459} 1465}
1460 1466
1461/* 1467/*
@@ -1997,12 +2003,34 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
1997 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2003 status = nfs4_call_sync(server, &msg, &args, &res, 0);
1998 if (status == 0) { 2004 if (status == 0) {
1999 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); 2005 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
2006 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
2007 NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
2008 NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
2009 NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
2010 NFS_CAP_CTIME|NFS_CAP_MTIME);
2000 if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) 2011 if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
2001 server->caps |= NFS_CAP_ACLS; 2012 server->caps |= NFS_CAP_ACLS;
2002 if (res.has_links != 0) 2013 if (res.has_links != 0)
2003 server->caps |= NFS_CAP_HARDLINKS; 2014 server->caps |= NFS_CAP_HARDLINKS;
2004 if (res.has_symlinks != 0) 2015 if (res.has_symlinks != 0)
2005 server->caps |= NFS_CAP_SYMLINKS; 2016 server->caps |= NFS_CAP_SYMLINKS;
2017 if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID)
2018 server->caps |= NFS_CAP_FILEID;
2019 if (res.attr_bitmask[1] & FATTR4_WORD1_MODE)
2020 server->caps |= NFS_CAP_MODE;
2021 if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)
2022 server->caps |= NFS_CAP_NLINK;
2023 if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER)
2024 server->caps |= NFS_CAP_OWNER;
2025 if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)
2026 server->caps |= NFS_CAP_OWNER_GROUP;
2027 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)
2028 server->caps |= NFS_CAP_ATIME;
2029 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)
2030 server->caps |= NFS_CAP_CTIME;
2031 if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)
2032 server->caps |= NFS_CAP_MTIME;
2033
2006 memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); 2034 memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
2007 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; 2035 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
2008 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; 2036 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 617273e7d47f..cfc30d362f94 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -702,29 +702,12 @@ struct compound_hdr {
702 u32 minorversion; 702 u32 minorversion;
703}; 703};
704 704
705/* 705static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
706 * START OF "GENERIC" ENCODE ROUTINES. 706{
707 * These may look a little ugly since they are imported from a "generic" 707 __be32 *p = xdr_reserve_space(xdr, nbytes);
708 * set of XDR encode/decode routines which are intended to be shared by 708 BUG_ON(!p);
709 * all of our NFSv4 implementations (OpenBSD, MacOS X...). 709 return p;
710 * 710}
711 * If the pain of reading these is too great, it should be a straightforward
712 * task to translate them into Linux-specific versions which are more
713 * consistent with the style used in NFSv2/v3...
714 */
715#define WRITE32(n) *p++ = htonl(n)
716#define WRITE64(n) do { \
717 *p++ = htonl((uint32_t)((n) >> 32)); \
718 *p++ = htonl((uint32_t)(n)); \
719} while (0)
720#define WRITEMEM(ptr,nbytes) do { \
721 p = xdr_encode_opaque_fixed(p, ptr, nbytes); \
722} while (0)
723
724#define RESERVE_SPACE(nbytes) do { \
725 p = xdr_reserve_space(xdr, nbytes); \
726 BUG_ON(!p); \
727} while (0)
728 711
729static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) 712static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
730{ 713{
@@ -749,12 +732,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
749 732
750 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); 733 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
751 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); 734 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
752 RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2)); 735 p = reserve_space(xdr, 4 + hdr->taglen + 8);
753 WRITE32(hdr->taglen); 736 p = xdr_encode_opaque(p, hdr->tag, hdr->taglen);
754 WRITEMEM(hdr->tag, hdr->taglen); 737 *p++ = cpu_to_be32(hdr->minorversion);
755 WRITE32(hdr->minorversion);
756 hdr->nops_p = p; 738 hdr->nops_p = p;
757 WRITE32(hdr->nops); 739 *p = cpu_to_be32(hdr->nops);
758} 740}
759 741
760static void encode_nops(struct compound_hdr *hdr) 742static void encode_nops(struct compound_hdr *hdr)
@@ -829,55 +811,53 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
829 len += 16; 811 len += 16;
830 else if (iap->ia_valid & ATTR_MTIME) 812 else if (iap->ia_valid & ATTR_MTIME)
831 len += 4; 813 len += 4;
832 RESERVE_SPACE(len); 814 p = reserve_space(xdr, len);
833 815
834 /* 816 /*
835 * We write the bitmap length now, but leave the bitmap and the attribute 817 * We write the bitmap length now, but leave the bitmap and the attribute
836 * buffer length to be backfilled at the end of this routine. 818 * buffer length to be backfilled at the end of this routine.
837 */ 819 */
838 WRITE32(2); 820 *p++ = cpu_to_be32(2);
839 q = p; 821 q = p;
840 p += 3; 822 p += 3;
841 823
842 if (iap->ia_valid & ATTR_SIZE) { 824 if (iap->ia_valid & ATTR_SIZE) {
843 bmval0 |= FATTR4_WORD0_SIZE; 825 bmval0 |= FATTR4_WORD0_SIZE;
844 WRITE64(iap->ia_size); 826 p = xdr_encode_hyper(p, iap->ia_size);
845 } 827 }
846 if (iap->ia_valid & ATTR_MODE) { 828 if (iap->ia_valid & ATTR_MODE) {
847 bmval1 |= FATTR4_WORD1_MODE; 829 bmval1 |= FATTR4_WORD1_MODE;
848 WRITE32(iap->ia_mode & S_IALLUGO); 830 *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO);
849 } 831 }
850 if (iap->ia_valid & ATTR_UID) { 832 if (iap->ia_valid & ATTR_UID) {
851 bmval1 |= FATTR4_WORD1_OWNER; 833 bmval1 |= FATTR4_WORD1_OWNER;
852 WRITE32(owner_namelen); 834 p = xdr_encode_opaque(p, owner_name, owner_namelen);
853 WRITEMEM(owner_name, owner_namelen);
854 } 835 }
855 if (iap->ia_valid & ATTR_GID) { 836 if (iap->ia_valid & ATTR_GID) {
856 bmval1 |= FATTR4_WORD1_OWNER_GROUP; 837 bmval1 |= FATTR4_WORD1_OWNER_GROUP;
857 WRITE32(owner_grouplen); 838 p = xdr_encode_opaque(p, owner_group, owner_grouplen);
858 WRITEMEM(owner_group, owner_grouplen);
859 } 839 }
860 if (iap->ia_valid & ATTR_ATIME_SET) { 840 if (iap->ia_valid & ATTR_ATIME_SET) {
861 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 841 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
862 WRITE32(NFS4_SET_TO_CLIENT_TIME); 842 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
863 WRITE32(0); 843 *p++ = cpu_to_be32(0);
864 WRITE32(iap->ia_mtime.tv_sec); 844 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
865 WRITE32(iap->ia_mtime.tv_nsec); 845 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
866 } 846 }
867 else if (iap->ia_valid & ATTR_ATIME) { 847 else if (iap->ia_valid & ATTR_ATIME) {
868 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 848 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
869 WRITE32(NFS4_SET_TO_SERVER_TIME); 849 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
870 } 850 }
871 if (iap->ia_valid & ATTR_MTIME_SET) { 851 if (iap->ia_valid & ATTR_MTIME_SET) {
872 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; 852 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
873 WRITE32(NFS4_SET_TO_CLIENT_TIME); 853 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
874 WRITE32(0); 854 *p++ = cpu_to_be32(0);
875 WRITE32(iap->ia_mtime.tv_sec); 855 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
876 WRITE32(iap->ia_mtime.tv_nsec); 856 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
877 } 857 }
878 else if (iap->ia_valid & ATTR_MTIME) { 858 else if (iap->ia_valid & ATTR_MTIME) {
879 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; 859 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
880 WRITE32(NFS4_SET_TO_SERVER_TIME); 860 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
881 } 861 }
882 862
883 /* 863 /*
@@ -891,7 +871,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
891 len = (char *)p - (char *)q - 12; 871 len = (char *)p - (char *)q - 12;
892 *q++ = htonl(bmval0); 872 *q++ = htonl(bmval0);
893 *q++ = htonl(bmval1); 873 *q++ = htonl(bmval1);
894 *q++ = htonl(len); 874 *q = htonl(len);
895 875
896/* out: */ 876/* out: */
897} 877}
@@ -900,9 +880,9 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd
900{ 880{
901 __be32 *p; 881 __be32 *p;
902 882
903 RESERVE_SPACE(8); 883 p = reserve_space(xdr, 8);
904 WRITE32(OP_ACCESS); 884 *p++ = cpu_to_be32(OP_ACCESS);
905 WRITE32(access); 885 *p = cpu_to_be32(access);
906 hdr->nops++; 886 hdr->nops++;
907 hdr->replen += decode_access_maxsz; 887 hdr->replen += decode_access_maxsz;
908} 888}
@@ -911,10 +891,10 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
911{ 891{
912 __be32 *p; 892 __be32 *p;
913 893
914 RESERVE_SPACE(8+NFS4_STATEID_SIZE); 894 p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
915 WRITE32(OP_CLOSE); 895 *p++ = cpu_to_be32(OP_CLOSE);
916 WRITE32(arg->seqid->sequence->counter); 896 *p++ = cpu_to_be32(arg->seqid->sequence->counter);
917 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 897 xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
918 hdr->nops++; 898 hdr->nops++;
919 hdr->replen += decode_close_maxsz; 899 hdr->replen += decode_close_maxsz;
920} 900}
@@ -923,10 +903,10 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar
923{ 903{
924 __be32 *p; 904 __be32 *p;
925 905
926 RESERVE_SPACE(16); 906 p = reserve_space(xdr, 16);
927 WRITE32(OP_COMMIT); 907 *p++ = cpu_to_be32(OP_COMMIT);
928 WRITE64(args->offset); 908 p = xdr_encode_hyper(p, args->offset);
929 WRITE32(args->count); 909 *p = cpu_to_be32(args->count);
930 hdr->nops++; 910 hdr->nops++;
931 hdr->replen += decode_commit_maxsz; 911 hdr->replen += decode_commit_maxsz;
932} 912}
@@ -935,30 +915,28 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
935{ 915{
936 __be32 *p; 916 __be32 *p;
937 917
938 RESERVE_SPACE(8); 918 p = reserve_space(xdr, 8);
939 WRITE32(OP_CREATE); 919 *p++ = cpu_to_be32(OP_CREATE);
940 WRITE32(create->ftype); 920 *p = cpu_to_be32(create->ftype);
941 921
942 switch (create->ftype) { 922 switch (create->ftype) {
943 case NF4LNK: 923 case NF4LNK:
944 RESERVE_SPACE(4); 924 p = reserve_space(xdr, 4);
945 WRITE32(create->u.symlink.len); 925 *p = cpu_to_be32(create->u.symlink.len);
946 xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len); 926 xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
947 break; 927 break;
948 928
949 case NF4BLK: case NF4CHR: 929 case NF4BLK: case NF4CHR:
950 RESERVE_SPACE(8); 930 p = reserve_space(xdr, 8);
951 WRITE32(create->u.device.specdata1); 931 *p++ = cpu_to_be32(create->u.device.specdata1);
952 WRITE32(create->u.device.specdata2); 932 *p = cpu_to_be32(create->u.device.specdata2);
953 break; 933 break;
954 934
955 default: 935 default:
956 break; 936 break;
957 } 937 }
958 938
959 RESERVE_SPACE(4 + create->name->len); 939 encode_string(xdr, create->name->len, create->name->name);
960 WRITE32(create->name->len);
961 WRITEMEM(create->name->name, create->name->len);
962 hdr->nops++; 940 hdr->nops++;
963 hdr->replen += decode_create_maxsz; 941 hdr->replen += decode_create_maxsz;
964 942
@@ -969,10 +947,10 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
969{ 947{
970 __be32 *p; 948 __be32 *p;
971 949
972 RESERVE_SPACE(12); 950 p = reserve_space(xdr, 12);
973 WRITE32(OP_GETATTR); 951 *p++ = cpu_to_be32(OP_GETATTR);
974 WRITE32(1); 952 *p++ = cpu_to_be32(1);
975 WRITE32(bitmap); 953 *p = cpu_to_be32(bitmap);
976 hdr->nops++; 954 hdr->nops++;
977 hdr->replen += decode_getattr_maxsz; 955 hdr->replen += decode_getattr_maxsz;
978} 956}
@@ -981,11 +959,11 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm
981{ 959{
982 __be32 *p; 960 __be32 *p;
983 961
984 RESERVE_SPACE(16); 962 p = reserve_space(xdr, 16);
985 WRITE32(OP_GETATTR); 963 *p++ = cpu_to_be32(OP_GETATTR);
986 WRITE32(2); 964 *p++ = cpu_to_be32(2);
987 WRITE32(bm0); 965 *p++ = cpu_to_be32(bm0);
988 WRITE32(bm1); 966 *p = cpu_to_be32(bm1);
989 hdr->nops++; 967 hdr->nops++;
990 hdr->replen += decode_getattr_maxsz; 968 hdr->replen += decode_getattr_maxsz;
991} 969}
@@ -1012,8 +990,8 @@ static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1012{ 990{
1013 __be32 *p; 991 __be32 *p;
1014 992
1015 RESERVE_SPACE(4); 993 p = reserve_space(xdr, 4);
1016 WRITE32(OP_GETFH); 994 *p = cpu_to_be32(OP_GETFH);
1017 hdr->nops++; 995 hdr->nops++;
1018 hdr->replen += decode_getfh_maxsz; 996 hdr->replen += decode_getfh_maxsz;
1019} 997}
@@ -1022,10 +1000,9 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
1022{ 1000{
1023 __be32 *p; 1001 __be32 *p;
1024 1002
1025 RESERVE_SPACE(8 + name->len); 1003 p = reserve_space(xdr, 8 + name->len);
1026 WRITE32(OP_LINK); 1004 *p++ = cpu_to_be32(OP_LINK);
1027 WRITE32(name->len); 1005 xdr_encode_opaque(p, name->name, name->len);
1028 WRITEMEM(name->name, name->len);
1029 hdr->nops++; 1006 hdr->nops++;
1030 hdr->replen += decode_link_maxsz; 1007 hdr->replen += decode_link_maxsz;
1031} 1008}
@@ -1052,27 +1029,27 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
1052{ 1029{
1053 __be32 *p; 1030 __be32 *p;
1054 1031
1055 RESERVE_SPACE(32); 1032 p = reserve_space(xdr, 32);
1056 WRITE32(OP_LOCK); 1033 *p++ = cpu_to_be32(OP_LOCK);
1057 WRITE32(nfs4_lock_type(args->fl, args->block)); 1034 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block));
1058 WRITE32(args->reclaim); 1035 *p++ = cpu_to_be32(args->reclaim);
1059 WRITE64(args->fl->fl_start); 1036 p = xdr_encode_hyper(p, args->fl->fl_start);
1060 WRITE64(nfs4_lock_length(args->fl)); 1037 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1061 WRITE32(args->new_lock_owner); 1038 *p = cpu_to_be32(args->new_lock_owner);
1062 if (args->new_lock_owner){ 1039 if (args->new_lock_owner){
1063 RESERVE_SPACE(4+NFS4_STATEID_SIZE+32); 1040 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
1064 WRITE32(args->open_seqid->sequence->counter); 1041 *p++ = cpu_to_be32(args->open_seqid->sequence->counter);
1065 WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); 1042 p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
1066 WRITE32(args->lock_seqid->sequence->counter); 1043 *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
1067 WRITE64(args->lock_owner.clientid); 1044 p = xdr_encode_hyper(p, args->lock_owner.clientid);
1068 WRITE32(16); 1045 *p++ = cpu_to_be32(16);
1069 WRITEMEM("lock id:", 8); 1046 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1070 WRITE64(args->lock_owner.id); 1047 xdr_encode_hyper(p, args->lock_owner.id);
1071 } 1048 }
1072 else { 1049 else {
1073 RESERVE_SPACE(NFS4_STATEID_SIZE+4); 1050 p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
1074 WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE); 1051 p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
1075 WRITE32(args->lock_seqid->sequence->counter); 1052 *p = cpu_to_be32(args->lock_seqid->sequence->counter);
1076 } 1053 }
1077 hdr->nops++; 1054 hdr->nops++;
1078 hdr->replen += decode_lock_maxsz; 1055 hdr->replen += decode_lock_maxsz;
@@ -1082,15 +1059,15 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar
1082{ 1059{
1083 __be32 *p; 1060 __be32 *p;
1084 1061
1085 RESERVE_SPACE(52); 1062 p = reserve_space(xdr, 52);
1086 WRITE32(OP_LOCKT); 1063 *p++ = cpu_to_be32(OP_LOCKT);
1087 WRITE32(nfs4_lock_type(args->fl, 0)); 1064 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1088 WRITE64(args->fl->fl_start); 1065 p = xdr_encode_hyper(p, args->fl->fl_start);
1089 WRITE64(nfs4_lock_length(args->fl)); 1066 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1090 WRITE64(args->lock_owner.clientid); 1067 p = xdr_encode_hyper(p, args->lock_owner.clientid);
1091 WRITE32(16); 1068 *p++ = cpu_to_be32(16);
1092 WRITEMEM("lock id:", 8); 1069 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1093 WRITE64(args->lock_owner.id); 1070 xdr_encode_hyper(p, args->lock_owner.id);
1094 hdr->nops++; 1071 hdr->nops++;
1095 hdr->replen += decode_lockt_maxsz; 1072 hdr->replen += decode_lockt_maxsz;
1096} 1073}
@@ -1099,13 +1076,13 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
1099{ 1076{
1100 __be32 *p; 1077 __be32 *p;
1101 1078
1102 RESERVE_SPACE(12+NFS4_STATEID_SIZE+16); 1079 p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16);
1103 WRITE32(OP_LOCKU); 1080 *p++ = cpu_to_be32(OP_LOCKU);
1104 WRITE32(nfs4_lock_type(args->fl, 0)); 1081 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1105 WRITE32(args->seqid->sequence->counter); 1082 *p++ = cpu_to_be32(args->seqid->sequence->counter);
1106 WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE); 1083 p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
1107 WRITE64(args->fl->fl_start); 1084 p = xdr_encode_hyper(p, args->fl->fl_start);
1108 WRITE64(nfs4_lock_length(args->fl)); 1085 xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1109 hdr->nops++; 1086 hdr->nops++;
1110 hdr->replen += decode_locku_maxsz; 1087 hdr->replen += decode_locku_maxsz;
1111} 1088}
@@ -1115,10 +1092,9 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
1115 int len = name->len; 1092 int len = name->len;
1116 __be32 *p; 1093 __be32 *p;
1117 1094
1118 RESERVE_SPACE(8 + len); 1095 p = reserve_space(xdr, 8 + len);
1119 WRITE32(OP_LOOKUP); 1096 *p++ = cpu_to_be32(OP_LOOKUP);
1120 WRITE32(len); 1097 xdr_encode_opaque(p, name->name, len);
1121 WRITEMEM(name->name, len);
1122 hdr->nops++; 1098 hdr->nops++;
1123 hdr->replen += decode_lookup_maxsz; 1099 hdr->replen += decode_lookup_maxsz;
1124} 1100}
@@ -1127,21 +1103,21 @@ static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
1127{ 1103{
1128 __be32 *p; 1104 __be32 *p;
1129 1105
1130 RESERVE_SPACE(8); 1106 p = reserve_space(xdr, 8);
1131 switch (fmode & (FMODE_READ|FMODE_WRITE)) { 1107 switch (fmode & (FMODE_READ|FMODE_WRITE)) {
1132 case FMODE_READ: 1108 case FMODE_READ:
1133 WRITE32(NFS4_SHARE_ACCESS_READ); 1109 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_READ);
1134 break; 1110 break;
1135 case FMODE_WRITE: 1111 case FMODE_WRITE:
1136 WRITE32(NFS4_SHARE_ACCESS_WRITE); 1112 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_WRITE);
1137 break; 1113 break;
1138 case FMODE_READ|FMODE_WRITE: 1114 case FMODE_READ|FMODE_WRITE:
1139 WRITE32(NFS4_SHARE_ACCESS_BOTH); 1115 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_BOTH);
1140 break; 1116 break;
1141 default: 1117 default:
1142 WRITE32(0); 1118 *p++ = cpu_to_be32(0);
1143 } 1119 }
1144 WRITE32(0); /* for linux, share_deny = 0 always */ 1120 *p = cpu_to_be32(0); /* for linux, share_deny = 0 always */
1145} 1121}
1146 1122
1147static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1123static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1151,29 +1127,29 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
1151 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, 1127 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
1152 * owner 4 = 32 1128 * owner 4 = 32
1153 */ 1129 */
1154 RESERVE_SPACE(8); 1130 p = reserve_space(xdr, 8);
1155 WRITE32(OP_OPEN); 1131 *p++ = cpu_to_be32(OP_OPEN);
1156 WRITE32(arg->seqid->sequence->counter); 1132 *p = cpu_to_be32(arg->seqid->sequence->counter);
1157 encode_share_access(xdr, arg->fmode); 1133 encode_share_access(xdr, arg->fmode);
1158 RESERVE_SPACE(28); 1134 p = reserve_space(xdr, 28);
1159 WRITE64(arg->clientid); 1135 p = xdr_encode_hyper(p, arg->clientid);
1160 WRITE32(16); 1136 *p++ = cpu_to_be32(16);
1161 WRITEMEM("open id:", 8); 1137 p = xdr_encode_opaque_fixed(p, "open id:", 8);
1162 WRITE64(arg->id); 1138 xdr_encode_hyper(p, arg->id);
1163} 1139}
1164 1140
1165static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1141static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
1166{ 1142{
1167 __be32 *p; 1143 __be32 *p;
1168 1144
1169 RESERVE_SPACE(4); 1145 p = reserve_space(xdr, 4);
1170 switch(arg->open_flags & O_EXCL) { 1146 switch(arg->open_flags & O_EXCL) {
1171 case 0: 1147 case 0:
1172 WRITE32(NFS4_CREATE_UNCHECKED); 1148 *p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
1173 encode_attrs(xdr, arg->u.attrs, arg->server); 1149 encode_attrs(xdr, arg->u.attrs, arg->server);
1174 break; 1150 break;
1175 default: 1151 default:
1176 WRITE32(NFS4_CREATE_EXCLUSIVE); 1152 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
1177 encode_nfs4_verifier(xdr, &arg->u.verifier); 1153 encode_nfs4_verifier(xdr, &arg->u.verifier);
1178 } 1154 }
1179} 1155}
@@ -1182,14 +1158,14 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
1182{ 1158{
1183 __be32 *p; 1159 __be32 *p;
1184 1160
1185 RESERVE_SPACE(4); 1161 p = reserve_space(xdr, 4);
1186 switch (arg->open_flags & O_CREAT) { 1162 switch (arg->open_flags & O_CREAT) {
1187 case 0: 1163 case 0:
1188 WRITE32(NFS4_OPEN_NOCREATE); 1164 *p = cpu_to_be32(NFS4_OPEN_NOCREATE);
1189 break; 1165 break;
1190 default: 1166 default:
1191 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); 1167 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
1192 WRITE32(NFS4_OPEN_CREATE); 1168 *p = cpu_to_be32(NFS4_OPEN_CREATE);
1193 encode_createmode(xdr, arg); 1169 encode_createmode(xdr, arg);
1194 } 1170 }
1195} 1171}
@@ -1198,16 +1174,16 @@ static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delega
1198{ 1174{
1199 __be32 *p; 1175 __be32 *p;
1200 1176
1201 RESERVE_SPACE(4); 1177 p = reserve_space(xdr, 4);
1202 switch (delegation_type) { 1178 switch (delegation_type) {
1203 case 0: 1179 case 0:
1204 WRITE32(NFS4_OPEN_DELEGATE_NONE); 1180 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE);
1205 break; 1181 break;
1206 case FMODE_READ: 1182 case FMODE_READ:
1207 WRITE32(NFS4_OPEN_DELEGATE_READ); 1183 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ);
1208 break; 1184 break;
1209 case FMODE_WRITE|FMODE_READ: 1185 case FMODE_WRITE|FMODE_READ:
1210 WRITE32(NFS4_OPEN_DELEGATE_WRITE); 1186 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE);
1211 break; 1187 break;
1212 default: 1188 default:
1213 BUG(); 1189 BUG();
@@ -1218,8 +1194,8 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
1218{ 1194{
1219 __be32 *p; 1195 __be32 *p;
1220 1196
1221 RESERVE_SPACE(4); 1197 p = reserve_space(xdr, 4);
1222 WRITE32(NFS4_OPEN_CLAIM_NULL); 1198 *p = cpu_to_be32(NFS4_OPEN_CLAIM_NULL);
1223 encode_string(xdr, name->len, name->name); 1199 encode_string(xdr, name->len, name->name);
1224} 1200}
1225 1201
@@ -1227,8 +1203,8 @@ static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
1227{ 1203{
1228 __be32 *p; 1204 __be32 *p;
1229 1205
1230 RESERVE_SPACE(4); 1206 p = reserve_space(xdr, 4);
1231 WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); 1207 *p = cpu_to_be32(NFS4_OPEN_CLAIM_PREVIOUS);
1232 encode_delegation_type(xdr, type); 1208 encode_delegation_type(xdr, type);
1233} 1209}
1234 1210
@@ -1236,9 +1212,9 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
1236{ 1212{
1237 __be32 *p; 1213 __be32 *p;
1238 1214
1239 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1215 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1240 WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR); 1216 *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
1241 WRITEMEM(stateid->data, NFS4_STATEID_SIZE); 1217 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
1242 encode_string(xdr, name->len, name->name); 1218 encode_string(xdr, name->len, name->name);
1243} 1219}
1244 1220
@@ -1267,10 +1243,10 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co
1267{ 1243{
1268 __be32 *p; 1244 __be32 *p;
1269 1245
1270 RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); 1246 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
1271 WRITE32(OP_OPEN_CONFIRM); 1247 *p++ = cpu_to_be32(OP_OPEN_CONFIRM);
1272 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1248 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1273 WRITE32(arg->seqid->sequence->counter); 1249 *p = cpu_to_be32(arg->seqid->sequence->counter);
1274 hdr->nops++; 1250 hdr->nops++;
1275 hdr->replen += decode_open_confirm_maxsz; 1251 hdr->replen += decode_open_confirm_maxsz;
1276} 1252}
@@ -1279,10 +1255,10 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close
1279{ 1255{
1280 __be32 *p; 1256 __be32 *p;
1281 1257
1282 RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); 1258 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
1283 WRITE32(OP_OPEN_DOWNGRADE); 1259 *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
1284 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1260 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1285 WRITE32(arg->seqid->sequence->counter); 1261 *p = cpu_to_be32(arg->seqid->sequence->counter);
1286 encode_share_access(xdr, arg->fmode); 1262 encode_share_access(xdr, arg->fmode);
1287 hdr->nops++; 1263 hdr->nops++;
1288 hdr->replen += decode_open_downgrade_maxsz; 1264 hdr->replen += decode_open_downgrade_maxsz;
@@ -1294,10 +1270,9 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd
1294 int len = fh->size; 1270 int len = fh->size;
1295 __be32 *p; 1271 __be32 *p;
1296 1272
1297 RESERVE_SPACE(8 + len); 1273 p = reserve_space(xdr, 8 + len);
1298 WRITE32(OP_PUTFH); 1274 *p++ = cpu_to_be32(OP_PUTFH);
1299 WRITE32(len); 1275 xdr_encode_opaque(p, fh->data, len);
1300 WRITEMEM(fh->data, len);
1301 hdr->nops++; 1276 hdr->nops++;
1302 hdr->replen += decode_putfh_maxsz; 1277 hdr->replen += decode_putfh_maxsz;
1303} 1278}
@@ -1306,8 +1281,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1306{ 1281{
1307 __be32 *p; 1282 __be32 *p;
1308 1283
1309 RESERVE_SPACE(4); 1284 p = reserve_space(xdr, 4);
1310 WRITE32(OP_PUTROOTFH); 1285 *p = cpu_to_be32(OP_PUTROOTFH);
1311 hdr->nops++; 1286 hdr->nops++;
1312 hdr->replen += decode_putrootfh_maxsz; 1287 hdr->replen += decode_putrootfh_maxsz;
1313} 1288}
@@ -1317,26 +1292,26 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
1317 nfs4_stateid stateid; 1292 nfs4_stateid stateid;
1318 __be32 *p; 1293 __be32 *p;
1319 1294
1320 RESERVE_SPACE(NFS4_STATEID_SIZE); 1295 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1321 if (ctx->state != NULL) { 1296 if (ctx->state != NULL) {
1322 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); 1297 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
1323 WRITEMEM(stateid.data, NFS4_STATEID_SIZE); 1298 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
1324 } else 1299 } else
1325 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); 1300 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
1326} 1301}
1327 1302
1328static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) 1303static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
1329{ 1304{
1330 __be32 *p; 1305 __be32 *p;
1331 1306
1332 RESERVE_SPACE(4); 1307 p = reserve_space(xdr, 4);
1333 WRITE32(OP_READ); 1308 *p = cpu_to_be32(OP_READ);
1334 1309
1335 encode_stateid(xdr, args->context); 1310 encode_stateid(xdr, args->context);
1336 1311
1337 RESERVE_SPACE(12); 1312 p = reserve_space(xdr, 12);
1338 WRITE64(args->offset); 1313 p = xdr_encode_hyper(p, args->offset);
1339 WRITE32(args->count); 1314 *p = cpu_to_be32(args->count);
1340 hdr->nops++; 1315 hdr->nops++;
1341 hdr->replen += decode_read_maxsz; 1316 hdr->replen += decode_read_maxsz;
1342} 1317}
@@ -1349,20 +1324,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1349 }; 1324 };
1350 __be32 *p; 1325 __be32 *p;
1351 1326
1352 RESERVE_SPACE(12+NFS4_VERIFIER_SIZE+20); 1327 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
1353 WRITE32(OP_READDIR); 1328 *p++ = cpu_to_be32(OP_READDIR);
1354 WRITE64(readdir->cookie); 1329 p = xdr_encode_hyper(p, readdir->cookie);
1355 WRITEMEM(readdir->verifier.data, NFS4_VERIFIER_SIZE); 1330 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
1356 WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ 1331 *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */
1357 WRITE32(readdir->count); 1332 *p++ = cpu_to_be32(readdir->count);
1358 WRITE32(2); 1333 *p++ = cpu_to_be32(2);
1359 /* Switch to mounted_on_fileid if the server supports it */ 1334 /* Switch to mounted_on_fileid if the server supports it */
1360 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) 1335 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1361 attrs[0] &= ~FATTR4_WORD0_FILEID; 1336 attrs[0] &= ~FATTR4_WORD0_FILEID;
1362 else 1337 else
1363 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 1338 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1364 WRITE32(attrs[0] & readdir->bitmask[0]); 1339 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
1365 WRITE32(attrs[1] & readdir->bitmask[1]); 1340 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
1366 hdr->nops++; 1341 hdr->nops++;
1367 hdr->replen += decode_readdir_maxsz; 1342 hdr->replen += decode_readdir_maxsz;
1368 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", 1343 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
@@ -1378,8 +1353,8 @@ static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *
1378{ 1353{
1379 __be32 *p; 1354 __be32 *p;
1380 1355
1381 RESERVE_SPACE(4); 1356 p = reserve_space(xdr, 4);
1382 WRITE32(OP_READLINK); 1357 *p = cpu_to_be32(OP_READLINK);
1383 hdr->nops++; 1358 hdr->nops++;
1384 hdr->replen += decode_readlink_maxsz; 1359 hdr->replen += decode_readlink_maxsz;
1385} 1360}
@@ -1388,10 +1363,9 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc
1388{ 1363{
1389 __be32 *p; 1364 __be32 *p;
1390 1365
1391 RESERVE_SPACE(8 + name->len); 1366 p = reserve_space(xdr, 8 + name->len);
1392 WRITE32(OP_REMOVE); 1367 *p++ = cpu_to_be32(OP_REMOVE);
1393 WRITE32(name->len); 1368 xdr_encode_opaque(p, name->name, name->len);
1394 WRITEMEM(name->name, name->len);
1395 hdr->nops++; 1369 hdr->nops++;
1396 hdr->replen += decode_remove_maxsz; 1370 hdr->replen += decode_remove_maxsz;
1397} 1371}
@@ -1400,14 +1374,10 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co
1400{ 1374{
1401 __be32 *p; 1375 __be32 *p;
1402 1376
1403 RESERVE_SPACE(8 + oldname->len); 1377 p = reserve_space(xdr, 4);
1404 WRITE32(OP_RENAME); 1378 *p = cpu_to_be32(OP_RENAME);
1405 WRITE32(oldname->len); 1379 encode_string(xdr, oldname->len, oldname->name);
1406 WRITEMEM(oldname->name, oldname->len); 1380 encode_string(xdr, newname->len, newname->name);
1407
1408 RESERVE_SPACE(4 + newname->len);
1409 WRITE32(newname->len);
1410 WRITEMEM(newname->name, newname->len);
1411 hdr->nops++; 1381 hdr->nops++;
1412 hdr->replen += decode_rename_maxsz; 1382 hdr->replen += decode_rename_maxsz;
1413} 1383}
@@ -1416,9 +1386,9 @@ static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client
1416{ 1386{
1417 __be32 *p; 1387 __be32 *p;
1418 1388
1419 RESERVE_SPACE(12); 1389 p = reserve_space(xdr, 12);
1420 WRITE32(OP_RENEW); 1390 *p++ = cpu_to_be32(OP_RENEW);
1421 WRITE64(client_stateid->cl_clientid); 1391 xdr_encode_hyper(p, client_stateid->cl_clientid);
1422 hdr->nops++; 1392 hdr->nops++;
1423 hdr->replen += decode_renew_maxsz; 1393 hdr->replen += decode_renew_maxsz;
1424} 1394}
@@ -1428,8 +1398,8 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1428{ 1398{
1429 __be32 *p; 1399 __be32 *p;
1430 1400
1431 RESERVE_SPACE(4); 1401 p = reserve_space(xdr, 4);
1432 WRITE32(OP_RESTOREFH); 1402 *p = cpu_to_be32(OP_RESTOREFH);
1433 hdr->nops++; 1403 hdr->nops++;
1434 hdr->replen += decode_restorefh_maxsz; 1404 hdr->replen += decode_restorefh_maxsz;
1435} 1405}
@@ -1439,16 +1409,16 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
1439{ 1409{
1440 __be32 *p; 1410 __be32 *p;
1441 1411
1442 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1412 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1443 WRITE32(OP_SETATTR); 1413 *p++ = cpu_to_be32(OP_SETATTR);
1444 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); 1414 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
1445 RESERVE_SPACE(2*4); 1415 p = reserve_space(xdr, 2*4);
1446 WRITE32(1); 1416 *p++ = cpu_to_be32(1);
1447 WRITE32(FATTR4_WORD0_ACL); 1417 *p = cpu_to_be32(FATTR4_WORD0_ACL);
1448 if (arg->acl_len % 4) 1418 if (arg->acl_len % 4)
1449 return -EINVAL; 1419 return -EINVAL;
1450 RESERVE_SPACE(4); 1420 p = reserve_space(xdr, 4);
1451 WRITE32(arg->acl_len); 1421 *p = cpu_to_be32(arg->acl_len);
1452 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); 1422 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
1453 hdr->nops++; 1423 hdr->nops++;
1454 hdr->replen += decode_setacl_maxsz; 1424 hdr->replen += decode_setacl_maxsz;
@@ -1460,8 +1430,8 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1460{ 1430{
1461 __be32 *p; 1431 __be32 *p;
1462 1432
1463 RESERVE_SPACE(4); 1433 p = reserve_space(xdr, 4);
1464 WRITE32(OP_SAVEFH); 1434 *p = cpu_to_be32(OP_SAVEFH);
1465 hdr->nops++; 1435 hdr->nops++;
1466 hdr->replen += decode_savefh_maxsz; 1436 hdr->replen += decode_savefh_maxsz;
1467} 1437}
@@ -1470,9 +1440,9 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
1470{ 1440{
1471 __be32 *p; 1441 __be32 *p;
1472 1442
1473 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1443 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1474 WRITE32(OP_SETATTR); 1444 *p++ = cpu_to_be32(OP_SETATTR);
1475 WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); 1445 xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
1476 hdr->nops++; 1446 hdr->nops++;
1477 hdr->replen += decode_setattr_maxsz; 1447 hdr->replen += decode_setattr_maxsz;
1478 encode_attrs(xdr, arg->iap, server); 1448 encode_attrs(xdr, arg->iap, server);
@@ -1482,17 +1452,17 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
1482{ 1452{
1483 __be32 *p; 1453 __be32 *p;
1484 1454
1485 RESERVE_SPACE(4 + NFS4_VERIFIER_SIZE); 1455 p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE);
1486 WRITE32(OP_SETCLIENTID); 1456 *p++ = cpu_to_be32(OP_SETCLIENTID);
1487 WRITEMEM(setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); 1457 xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
1488 1458
1489 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); 1459 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
1490 RESERVE_SPACE(4); 1460 p = reserve_space(xdr, 4);
1491 WRITE32(setclientid->sc_prog); 1461 *p = cpu_to_be32(setclientid->sc_prog);
1492 encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); 1462 encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
1493 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); 1463 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
1494 RESERVE_SPACE(4); 1464 p = reserve_space(xdr, 4);
1495 WRITE32(setclientid->sc_cb_ident); 1465 *p = cpu_to_be32(setclientid->sc_cb_ident);
1496 hdr->nops++; 1466 hdr->nops++;
1497 hdr->replen += decode_setclientid_maxsz; 1467 hdr->replen += decode_setclientid_maxsz;
1498} 1468}
@@ -1501,10 +1471,10 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_
1501{ 1471{
1502 __be32 *p; 1472 __be32 *p;
1503 1473
1504 RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE); 1474 p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
1505 WRITE32(OP_SETCLIENTID_CONFIRM); 1475 *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
1506 WRITE64(client_state->cl_clientid); 1476 p = xdr_encode_hyper(p, client_state->cl_clientid);
1507 WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); 1477 xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
1508 hdr->nops++; 1478 hdr->nops++;
1509 hdr->replen += decode_setclientid_confirm_maxsz; 1479 hdr->replen += decode_setclientid_confirm_maxsz;
1510} 1480}
@@ -1513,15 +1483,15 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1513{ 1483{
1514 __be32 *p; 1484 __be32 *p;
1515 1485
1516 RESERVE_SPACE(4); 1486 p = reserve_space(xdr, 4);
1517 WRITE32(OP_WRITE); 1487 *p = cpu_to_be32(OP_WRITE);
1518 1488
1519 encode_stateid(xdr, args->context); 1489 encode_stateid(xdr, args->context);
1520 1490
1521 RESERVE_SPACE(16); 1491 p = reserve_space(xdr, 16);
1522 WRITE64(args->offset); 1492 p = xdr_encode_hyper(p, args->offset);
1523 WRITE32(args->stable); 1493 *p++ = cpu_to_be32(args->stable);
1524 WRITE32(args->count); 1494 *p = cpu_to_be32(args->count);
1525 1495
1526 xdr_write_pages(xdr, args->pages, args->pgbase, args->count); 1496 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
1527 hdr->nops++; 1497 hdr->nops++;
@@ -1532,10 +1502,10 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
1532{ 1502{
1533 __be32 *p; 1503 __be32 *p;
1534 1504
1535 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1505 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1536 1506
1537 WRITE32(OP_DELEGRETURN); 1507 *p++ = cpu_to_be32(OP_DELEGRETURN);
1538 WRITEMEM(stateid->data, NFS4_STATEID_SIZE); 1508 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
1539 hdr->nops++; 1509 hdr->nops++;
1540 hdr->replen += decode_delegreturn_maxsz; 1510 hdr->replen += decode_delegreturn_maxsz;
1541} 1511}
@@ -1548,16 +1518,16 @@ static void encode_exchange_id(struct xdr_stream *xdr,
1548{ 1518{
1549 __be32 *p; 1519 __be32 *p;
1550 1520
1551 RESERVE_SPACE(4 + sizeof(args->verifier->data)); 1521 p = reserve_space(xdr, 4 + sizeof(args->verifier->data));
1552 WRITE32(OP_EXCHANGE_ID); 1522 *p++ = cpu_to_be32(OP_EXCHANGE_ID);
1553 WRITEMEM(args->verifier->data, sizeof(args->verifier->data)); 1523 xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data));
1554 1524
1555 encode_string(xdr, args->id_len, args->id); 1525 encode_string(xdr, args->id_len, args->id);
1556 1526
1557 RESERVE_SPACE(12); 1527 p = reserve_space(xdr, 12);
1558 WRITE32(args->flags); 1528 *p++ = cpu_to_be32(args->flags);
1559 WRITE32(0); /* zero length state_protect4_a */ 1529 *p++ = cpu_to_be32(0); /* zero length state_protect4_a */
1560 WRITE32(0); /* zero length implementation id array */ 1530 *p = cpu_to_be32(0); /* zero length implementation id array */
1561 hdr->nops++; 1531 hdr->nops++;
1562 hdr->replen += decode_exchange_id_maxsz; 1532 hdr->replen += decode_exchange_id_maxsz;
1563} 1533}
@@ -1571,55 +1541,43 @@ static void encode_create_session(struct xdr_stream *xdr,
1571 uint32_t len; 1541 uint32_t len;
1572 struct nfs_client *clp = args->client; 1542 struct nfs_client *clp = args->client;
1573 1543
1574 RESERVE_SPACE(4); 1544 len = scnprintf(machine_name, sizeof(machine_name), "%s",
1575 WRITE32(OP_CREATE_SESSION); 1545 clp->cl_ipaddr);
1576
1577 RESERVE_SPACE(8);
1578 WRITE64(clp->cl_ex_clid);
1579 1546
1580 RESERVE_SPACE(8); 1547 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1581 WRITE32(clp->cl_seqid); /*Sequence id */ 1548 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1582 WRITE32(args->flags); /*flags */ 1549 p = xdr_encode_hyper(p, clp->cl_ex_clid);
1550 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1551 *p++ = cpu_to_be32(args->flags); /*flags */
1583 1552
1584 RESERVE_SPACE(2*28); /* 2 channel_attrs */
1585 /* Fore Channel */ 1553 /* Fore Channel */
1586 WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ 1554 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
1587 WRITE32(args->fc_attrs.max_rqst_sz); /* max req size */ 1555 *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */
1588 WRITE32(args->fc_attrs.max_resp_sz); /* max resp size */ 1556 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */
1589 WRITE32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ 1557 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */
1590 WRITE32(args->fc_attrs.max_ops); /* max operations */ 1558 *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */
1591 WRITE32(args->fc_attrs.max_reqs); /* max requests */ 1559 *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */
1592 WRITE32(0); /* rdmachannel_attrs */ 1560 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
1593 1561
1594 /* Back Channel */ 1562 /* Back Channel */
1595 WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ 1563 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
1596 WRITE32(args->bc_attrs.max_rqst_sz); /* max req size */ 1564 *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */
1597 WRITE32(args->bc_attrs.max_resp_sz); /* max resp size */ 1565 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */
1598 WRITE32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ 1566 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */
1599 WRITE32(args->bc_attrs.max_ops); /* max operations */ 1567 *p++ = cpu_to_be32(args->bc_attrs.max_ops); /* max operations */
1600 WRITE32(args->bc_attrs.max_reqs); /* max requests */ 1568 *p++ = cpu_to_be32(args->bc_attrs.max_reqs); /* max requests */
1601 WRITE32(0); /* rdmachannel_attrs */ 1569 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
1602 1570
1603 RESERVE_SPACE(4); 1571 *p++ = cpu_to_be32(args->cb_program); /* cb_program */
1604 WRITE32(args->cb_program); /* cb_program */ 1572 *p++ = cpu_to_be32(1);
1605 1573 *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
1606 RESERVE_SPACE(4); /* # of security flavors */
1607 WRITE32(1);
1608
1609 RESERVE_SPACE(4);
1610 WRITE32(RPC_AUTH_UNIX); /* auth_sys */
1611 1574
1612 /* authsys_parms rfc1831 */ 1575 /* authsys_parms rfc1831 */
1613 RESERVE_SPACE(4); 1576 *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */
1614 WRITE32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ 1577 p = xdr_encode_opaque(p, machine_name, len);
1615 len = scnprintf(machine_name, sizeof(machine_name), "%s", 1578 *p++ = cpu_to_be32(0); /* UID */
1616 clp->cl_ipaddr); 1579 *p++ = cpu_to_be32(0); /* GID */
1617 RESERVE_SPACE(16 + len); 1580 *p = cpu_to_be32(0); /* No more gids */
1618 WRITE32(len);
1619 WRITEMEM(machine_name, len);
1620 WRITE32(0); /* UID */
1621 WRITE32(0); /* GID */
1622 WRITE32(0); /* No more gids */
1623 hdr->nops++; 1581 hdr->nops++;
1624 hdr->replen += decode_create_session_maxsz; 1582 hdr->replen += decode_create_session_maxsz;
1625} 1583}
@@ -1629,9 +1587,9 @@ static void encode_destroy_session(struct xdr_stream *xdr,
1629 struct compound_hdr *hdr) 1587 struct compound_hdr *hdr)
1630{ 1588{
1631 __be32 *p; 1589 __be32 *p;
1632 RESERVE_SPACE(4 + NFS4_MAX_SESSIONID_LEN); 1590 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN);
1633 WRITE32(OP_DESTROY_SESSION); 1591 *p++ = cpu_to_be32(OP_DESTROY_SESSION);
1634 WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1592 xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1635 hdr->nops++; 1593 hdr->nops++;
1636 hdr->replen += decode_destroy_session_maxsz; 1594 hdr->replen += decode_destroy_session_maxsz;
1637} 1595}
@@ -1655,8 +1613,8 @@ static void encode_sequence(struct xdr_stream *xdr,
1655 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); 1613 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
1656 slot = tp->slots + args->sa_slotid; 1614 slot = tp->slots + args->sa_slotid;
1657 1615
1658 RESERVE_SPACE(4); 1616 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16);
1659 WRITE32(OP_SEQUENCE); 1617 *p++ = cpu_to_be32(OP_SEQUENCE);
1660 1618
1661 /* 1619 /*
1662 * Sessionid + seqid + slotid + max slotid + cache_this 1620 * Sessionid + seqid + slotid + max slotid + cache_this
@@ -1670,12 +1628,11 @@ static void encode_sequence(struct xdr_stream *xdr,
1670 ((u32 *)session->sess_id.data)[3], 1628 ((u32 *)session->sess_id.data)[3],
1671 slot->seq_nr, args->sa_slotid, 1629 slot->seq_nr, args->sa_slotid,
1672 tp->highest_used_slotid, args->sa_cache_this); 1630 tp->highest_used_slotid, args->sa_cache_this);
1673 RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 16); 1631 p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1674 WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1632 *p++ = cpu_to_be32(slot->seq_nr);
1675 WRITE32(slot->seq_nr); 1633 *p++ = cpu_to_be32(args->sa_slotid);
1676 WRITE32(args->sa_slotid); 1634 *p++ = cpu_to_be32(tp->highest_used_slotid);
1677 WRITE32(tp->highest_used_slotid); 1635 *p = cpu_to_be32(args->sa_cache_this);
1678 WRITE32(args->sa_cache_this);
1679 hdr->nops++; 1636 hdr->nops++;
1680 hdr->replen += decode_sequence_maxsz; 1637 hdr->replen += decode_sequence_maxsz;
1681#endif /* CONFIG_NFS_V4_1 */ 1638#endif /* CONFIG_NFS_V4_1 */
@@ -2466,68 +2423,53 @@ static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
2466} 2423}
2467#endif /* CONFIG_NFS_V4_1 */ 2424#endif /* CONFIG_NFS_V4_1 */
2468 2425
2469/* 2426static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
2470 * START OF "GENERIC" DECODE ROUTINES. 2427{
2471 * These may look a little ugly since they are imported from a "generic" 2428 dprintk("nfs: %s: prematurely hit end of receive buffer. "
2472 * set of XDR encode/decode routines which are intended to be shared by 2429 "Remaining buffer length is %tu words.\n",
2473 * all of our NFSv4 implementations (OpenBSD, MacOS X...). 2430 func, xdr->end - xdr->p);
2474 * 2431}
2475 * If the pain of reading these is too great, it should be a straightforward
2476 * task to translate them into Linux-specific versions which are more
2477 * consistent with the style used in NFSv2/v3...
2478 */
2479#define READ32(x) (x) = ntohl(*p++)
2480#define READ64(x) do { \
2481 (x) = (u64)ntohl(*p++) << 32; \
2482 (x) |= ntohl(*p++); \
2483} while (0)
2484#define READTIME(x) do { \
2485 p++; \
2486 (x.tv_sec) = ntohl(*p++); \
2487 (x.tv_nsec) = ntohl(*p++); \
2488} while (0)
2489#define COPYMEM(x,nbytes) do { \
2490 memcpy((x), p, nbytes); \
2491 p += XDR_QUADLEN(nbytes); \
2492} while (0)
2493
2494#define READ_BUF(nbytes) do { \
2495 p = xdr_inline_decode(xdr, nbytes); \
2496 if (unlikely(!p)) { \
2497 dprintk("nfs: %s: prematurely hit end of receive" \
2498 " buffer\n", __func__); \
2499 dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
2500 __func__, xdr->p, nbytes, xdr->end); \
2501 return -EIO; \
2502 } \
2503} while (0)
2504 2432
2505static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) 2433static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
2506{ 2434{
2507 __be32 *p; 2435 __be32 *p;
2508 2436
2509 READ_BUF(4); 2437 p = xdr_inline_decode(xdr, 4);
2510 READ32(*len); 2438 if (unlikely(!p))
2511 READ_BUF(*len); 2439 goto out_overflow;
2440 *len = be32_to_cpup(p);
2441 p = xdr_inline_decode(xdr, *len);
2442 if (unlikely(!p))
2443 goto out_overflow;
2512 *string = (char *)p; 2444 *string = (char *)p;
2513 return 0; 2445 return 0;
2446out_overflow:
2447 print_overflow_msg(__func__, xdr);
2448 return -EIO;
2514} 2449}
2515 2450
2516static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) 2451static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
2517{ 2452{
2518 __be32 *p; 2453 __be32 *p;
2519 2454
2520 READ_BUF(8); 2455 p = xdr_inline_decode(xdr, 8);
2521 READ32(hdr->status); 2456 if (unlikely(!p))
2522 READ32(hdr->taglen); 2457 goto out_overflow;
2458 hdr->status = be32_to_cpup(p++);
2459 hdr->taglen = be32_to_cpup(p);
2523 2460
2524 READ_BUF(hdr->taglen + 4); 2461 p = xdr_inline_decode(xdr, hdr->taglen + 4);
2462 if (unlikely(!p))
2463 goto out_overflow;
2525 hdr->tag = (char *)p; 2464 hdr->tag = (char *)p;
2526 p += XDR_QUADLEN(hdr->taglen); 2465 p += XDR_QUADLEN(hdr->taglen);
2527 READ32(hdr->nops); 2466 hdr->nops = be32_to_cpup(p);
2528 if (unlikely(hdr->nops < 1)) 2467 if (unlikely(hdr->nops < 1))
2529 return nfs4_stat_to_errno(hdr->status); 2468 return nfs4_stat_to_errno(hdr->status);
2530 return 0; 2469 return 0;
2470out_overflow:
2471 print_overflow_msg(__func__, xdr);
2472 return -EIO;
2531} 2473}
2532 2474
2533static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) 2475static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
@@ -2536,18 +2478,23 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
2536 uint32_t opnum; 2478 uint32_t opnum;
2537 int32_t nfserr; 2479 int32_t nfserr;
2538 2480
2539 READ_BUF(8); 2481 p = xdr_inline_decode(xdr, 8);
2540 READ32(opnum); 2482 if (unlikely(!p))
2483 goto out_overflow;
2484 opnum = be32_to_cpup(p++);
2541 if (opnum != expected) { 2485 if (opnum != expected) {
2542 dprintk("nfs: Server returned operation" 2486 dprintk("nfs: Server returned operation"
2543 " %d but we issued a request for %d\n", 2487 " %d but we issued a request for %d\n",
2544 opnum, expected); 2488 opnum, expected);
2545 return -EIO; 2489 return -EIO;
2546 } 2490 }
2547 READ32(nfserr); 2491 nfserr = be32_to_cpup(p);
2548 if (nfserr != NFS_OK) 2492 if (nfserr != NFS_OK)
2549 return nfs4_stat_to_errno(nfserr); 2493 return nfs4_stat_to_errno(nfserr);
2550 return 0; 2494 return 0;
2495out_overflow:
2496 print_overflow_msg(__func__, xdr);
2497 return -EIO;
2551} 2498}
2552 2499
2553/* Dummy routine */ 2500/* Dummy routine */
@@ -2557,8 +2504,11 @@ static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
2557 unsigned int strlen; 2504 unsigned int strlen;
2558 char *str; 2505 char *str;
2559 2506
2560 READ_BUF(12); 2507 p = xdr_inline_decode(xdr, 12);
2561 return decode_opaque_inline(xdr, &strlen, &str); 2508 if (likely(p))
2509 return decode_opaque_inline(xdr, &strlen, &str);
2510 print_overflow_msg(__func__, xdr);
2511 return -EIO;
2562} 2512}
2563 2513
2564static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) 2514static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
@@ -2566,27 +2516,39 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
2566 uint32_t bmlen; 2516 uint32_t bmlen;
2567 __be32 *p; 2517 __be32 *p;
2568 2518
2569 READ_BUF(4); 2519 p = xdr_inline_decode(xdr, 4);
2570 READ32(bmlen); 2520 if (unlikely(!p))
2521 goto out_overflow;
2522 bmlen = be32_to_cpup(p);
2571 2523
2572 bitmap[0] = bitmap[1] = 0; 2524 bitmap[0] = bitmap[1] = 0;
2573 READ_BUF((bmlen << 2)); 2525 p = xdr_inline_decode(xdr, (bmlen << 2));
2526 if (unlikely(!p))
2527 goto out_overflow;
2574 if (bmlen > 0) { 2528 if (bmlen > 0) {
2575 READ32(bitmap[0]); 2529 bitmap[0] = be32_to_cpup(p++);
2576 if (bmlen > 1) 2530 if (bmlen > 1)
2577 READ32(bitmap[1]); 2531 bitmap[1] = be32_to_cpup(p);
2578 } 2532 }
2579 return 0; 2533 return 0;
2534out_overflow:
2535 print_overflow_msg(__func__, xdr);
2536 return -EIO;
2580} 2537}
2581 2538
2582static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep) 2539static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep)
2583{ 2540{
2584 __be32 *p; 2541 __be32 *p;
2585 2542
2586 READ_BUF(4); 2543 p = xdr_inline_decode(xdr, 4);
2587 READ32(*attrlen); 2544 if (unlikely(!p))
2545 goto out_overflow;
2546 *attrlen = be32_to_cpup(p);
2588 *savep = xdr->p; 2547 *savep = xdr->p;
2589 return 0; 2548 return 0;
2549out_overflow:
2550 print_overflow_msg(__func__, xdr);
2551 return -EIO;
2590} 2552}
2591 2553
2592static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) 2554static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
@@ -2609,8 +2571,10 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
2609 if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U))) 2571 if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
2610 return -EIO; 2572 return -EIO;
2611 if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) { 2573 if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) {
2612 READ_BUF(4); 2574 p = xdr_inline_decode(xdr, 4);
2613 READ32(*type); 2575 if (unlikely(!p))
2576 goto out_overflow;
2577 *type = be32_to_cpup(p);
2614 if (*type < NF4REG || *type > NF4NAMEDATTR) { 2578 if (*type < NF4REG || *type > NF4NAMEDATTR) {
2615 dprintk("%s: bad type %d\n", __func__, *type); 2579 dprintk("%s: bad type %d\n", __func__, *type);
2616 return -EIO; 2580 return -EIO;
@@ -2620,6 +2584,9 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
2620 } 2584 }
2621 dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]); 2585 dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
2622 return ret; 2586 return ret;
2587out_overflow:
2588 print_overflow_msg(__func__, xdr);
2589 return -EIO;
2623} 2590}
2624 2591
2625static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) 2592static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
@@ -2631,14 +2598,19 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
2631 if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U))) 2598 if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
2632 return -EIO; 2599 return -EIO;
2633 if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) { 2600 if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) {
2634 READ_BUF(8); 2601 p = xdr_inline_decode(xdr, 8);
2635 READ64(*change); 2602 if (unlikely(!p))
2603 goto out_overflow;
2604 xdr_decode_hyper(p, change);
2636 bitmap[0] &= ~FATTR4_WORD0_CHANGE; 2605 bitmap[0] &= ~FATTR4_WORD0_CHANGE;
2637 ret = NFS_ATTR_FATTR_CHANGE; 2606 ret = NFS_ATTR_FATTR_CHANGE;
2638 } 2607 }
2639 dprintk("%s: change attribute=%Lu\n", __func__, 2608 dprintk("%s: change attribute=%Lu\n", __func__,
2640 (unsigned long long)*change); 2609 (unsigned long long)*change);
2641 return ret; 2610 return ret;
2611out_overflow:
2612 print_overflow_msg(__func__, xdr);
2613 return -EIO;
2642} 2614}
2643 2615
2644static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size) 2616static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
@@ -2650,13 +2622,18 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
2650 if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U))) 2622 if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
2651 return -EIO; 2623 return -EIO;
2652 if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) { 2624 if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) {
2653 READ_BUF(8); 2625 p = xdr_inline_decode(xdr, 8);
2654 READ64(*size); 2626 if (unlikely(!p))
2627 goto out_overflow;
2628 xdr_decode_hyper(p, size);
2655 bitmap[0] &= ~FATTR4_WORD0_SIZE; 2629 bitmap[0] &= ~FATTR4_WORD0_SIZE;
2656 ret = NFS_ATTR_FATTR_SIZE; 2630 ret = NFS_ATTR_FATTR_SIZE;
2657 } 2631 }
2658 dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size); 2632 dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
2659 return ret; 2633 return ret;
2634out_overflow:
2635 print_overflow_msg(__func__, xdr);
2636 return -EIO;
2660} 2637}
2661 2638
2662static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2639static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2667,12 +2644,17 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
2667 if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U))) 2644 if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U)))
2668 return -EIO; 2645 return -EIO;
2669 if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) { 2646 if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) {
2670 READ_BUF(4); 2647 p = xdr_inline_decode(xdr, 4);
2671 READ32(*res); 2648 if (unlikely(!p))
2649 goto out_overflow;
2650 *res = be32_to_cpup(p);
2672 bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; 2651 bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
2673 } 2652 }
2674 dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); 2653 dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true");
2675 return 0; 2654 return 0;
2655out_overflow:
2656 print_overflow_msg(__func__, xdr);
2657 return -EIO;
2676} 2658}
2677 2659
2678static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2660static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2683,12 +2665,17 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
2683 if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U))) 2665 if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U)))
2684 return -EIO; 2666 return -EIO;
2685 if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) { 2667 if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) {
2686 READ_BUF(4); 2668 p = xdr_inline_decode(xdr, 4);
2687 READ32(*res); 2669 if (unlikely(!p))
2670 goto out_overflow;
2671 *res = be32_to_cpup(p);
2688 bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; 2672 bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
2689 } 2673 }
2690 dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); 2674 dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true");
2691 return 0; 2675 return 0;
2676out_overflow:
2677 print_overflow_msg(__func__, xdr);
2678 return -EIO;
2692} 2679}
2693 2680
2694static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) 2681static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
@@ -2701,9 +2688,11 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
2701 if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U))) 2688 if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U)))
2702 return -EIO; 2689 return -EIO;
2703 if (likely(bitmap[0] & FATTR4_WORD0_FSID)) { 2690 if (likely(bitmap[0] & FATTR4_WORD0_FSID)) {
2704 READ_BUF(16); 2691 p = xdr_inline_decode(xdr, 16);
2705 READ64(fsid->major); 2692 if (unlikely(!p))
2706 READ64(fsid->minor); 2693 goto out_overflow;
2694 p = xdr_decode_hyper(p, &fsid->major);
2695 xdr_decode_hyper(p, &fsid->minor);
2707 bitmap[0] &= ~FATTR4_WORD0_FSID; 2696 bitmap[0] &= ~FATTR4_WORD0_FSID;
2708 ret = NFS_ATTR_FATTR_FSID; 2697 ret = NFS_ATTR_FATTR_FSID;
2709 } 2698 }
@@ -2711,6 +2700,9 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
2711 (unsigned long long)fsid->major, 2700 (unsigned long long)fsid->major,
2712 (unsigned long long)fsid->minor); 2701 (unsigned long long)fsid->minor);
2713 return ret; 2702 return ret;
2703out_overflow:
2704 print_overflow_msg(__func__, xdr);
2705 return -EIO;
2714} 2706}
2715 2707
2716static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2708static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2721,12 +2713,17 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint
2721 if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U))) 2713 if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U)))
2722 return -EIO; 2714 return -EIO;
2723 if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) { 2715 if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) {
2724 READ_BUF(4); 2716 p = xdr_inline_decode(xdr, 4);
2725 READ32(*res); 2717 if (unlikely(!p))
2718 goto out_overflow;
2719 *res = be32_to_cpup(p);
2726 bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; 2720 bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME;
2727 } 2721 }
2728 dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); 2722 dprintk("%s: file size=%u\n", __func__, (unsigned int)*res);
2729 return 0; 2723 return 0;
2724out_overflow:
2725 print_overflow_msg(__func__, xdr);
2726 return -EIO;
2730} 2727}
2731 2728
2732static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2729static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2737,12 +2734,17 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
2737 if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U))) 2734 if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
2738 return -EIO; 2735 return -EIO;
2739 if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) { 2736 if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
2740 READ_BUF(4); 2737 p = xdr_inline_decode(xdr, 4);
2741 READ32(*res); 2738 if (unlikely(!p))
2739 goto out_overflow;
2740 *res = be32_to_cpup(p);
2742 bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT; 2741 bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT;
2743 } 2742 }
2744 dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res); 2743 dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res);
2745 return 0; 2744 return 0;
2745out_overflow:
2746 print_overflow_msg(__func__, xdr);
2747 return -EIO;
2746} 2748}
2747 2749
2748static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) 2750static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2754,13 +2756,18 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
2754 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U))) 2756 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
2755 return -EIO; 2757 return -EIO;
2756 if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) { 2758 if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) {
2757 READ_BUF(8); 2759 p = xdr_inline_decode(xdr, 8);
2758 READ64(*fileid); 2760 if (unlikely(!p))
2761 goto out_overflow;
2762 xdr_decode_hyper(p, fileid);
2759 bitmap[0] &= ~FATTR4_WORD0_FILEID; 2763 bitmap[0] &= ~FATTR4_WORD0_FILEID;
2760 ret = NFS_ATTR_FATTR_FILEID; 2764 ret = NFS_ATTR_FATTR_FILEID;
2761 } 2765 }
2762 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 2766 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
2763 return ret; 2767 return ret;
2768out_overflow:
2769 print_overflow_msg(__func__, xdr);
2770 return -EIO;
2764} 2771}
2765 2772
2766static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) 2773static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2772,13 +2779,18 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
2772 if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U))) 2779 if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
2773 return -EIO; 2780 return -EIO;
2774 if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) { 2781 if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) {
2775 READ_BUF(8); 2782 p = xdr_inline_decode(xdr, 8);
2776 READ64(*fileid); 2783 if (unlikely(!p))
2784 goto out_overflow;
2785 xdr_decode_hyper(p, fileid);
2777 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 2786 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
2778 ret = NFS_ATTR_FATTR_FILEID; 2787 ret = NFS_ATTR_FATTR_FILEID;
2779 } 2788 }
2780 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 2789 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
2781 return ret; 2790 return ret;
2791out_overflow:
2792 print_overflow_msg(__func__, xdr);
2793 return -EIO;
2782} 2794}
2783 2795
2784static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2796static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2790,12 +2802,17 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
2790 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U))) 2802 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U)))
2791 return -EIO; 2803 return -EIO;
2792 if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) { 2804 if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) {
2793 READ_BUF(8); 2805 p = xdr_inline_decode(xdr, 8);
2794 READ64(*res); 2806 if (unlikely(!p))
2807 goto out_overflow;
2808 xdr_decode_hyper(p, res);
2795 bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL; 2809 bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL;
2796 } 2810 }
2797 dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res); 2811 dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res);
2798 return status; 2812 return status;
2813out_overflow:
2814 print_overflow_msg(__func__, xdr);
2815 return -EIO;
2799} 2816}
2800 2817
2801static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2818static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2807,12 +2824,17 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
2807 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U))) 2824 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U)))
2808 return -EIO; 2825 return -EIO;
2809 if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) { 2826 if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) {
2810 READ_BUF(8); 2827 p = xdr_inline_decode(xdr, 8);
2811 READ64(*res); 2828 if (unlikely(!p))
2829 goto out_overflow;
2830 xdr_decode_hyper(p, res);
2812 bitmap[0] &= ~FATTR4_WORD0_FILES_FREE; 2831 bitmap[0] &= ~FATTR4_WORD0_FILES_FREE;
2813 } 2832 }
2814 dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res); 2833 dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res);
2815 return status; 2834 return status;
2835out_overflow:
2836 print_overflow_msg(__func__, xdr);
2837 return -EIO;
2816} 2838}
2817 2839
2818static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2840static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2824,12 +2846,17 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
2824 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U))) 2846 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U)))
2825 return -EIO; 2847 return -EIO;
2826 if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) { 2848 if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) {
2827 READ_BUF(8); 2849 p = xdr_inline_decode(xdr, 8);
2828 READ64(*res); 2850 if (unlikely(!p))
2851 goto out_overflow;
2852 xdr_decode_hyper(p, res);
2829 bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL; 2853 bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL;
2830 } 2854 }
2831 dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res); 2855 dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res);
2832 return status; 2856 return status;
2857out_overflow:
2858 print_overflow_msg(__func__, xdr);
2859 return -EIO;
2833} 2860}
2834 2861
2835static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) 2862static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
@@ -2838,8 +2865,10 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
2838 __be32 *p; 2865 __be32 *p;
2839 int status = 0; 2866 int status = 0;
2840 2867
2841 READ_BUF(4); 2868 p = xdr_inline_decode(xdr, 4);
2842 READ32(n); 2869 if (unlikely(!p))
2870 goto out_overflow;
2871 n = be32_to_cpup(p);
2843 if (n == 0) 2872 if (n == 0)
2844 goto root_path; 2873 goto root_path;
2845 dprintk("path "); 2874 dprintk("path ");
@@ -2873,6 +2902,9 @@ out_eio:
2873 dprintk(" status %d", status); 2902 dprintk(" status %d", status);
2874 status = -EIO; 2903 status = -EIO;
2875 goto out; 2904 goto out;
2905out_overflow:
2906 print_overflow_msg(__func__, xdr);
2907 return -EIO;
2876} 2908}
2877 2909
2878static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res) 2910static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res)
@@ -2890,8 +2922,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2890 status = decode_pathname(xdr, &res->fs_path); 2922 status = decode_pathname(xdr, &res->fs_path);
2891 if (unlikely(status != 0)) 2923 if (unlikely(status != 0))
2892 goto out; 2924 goto out;
2893 READ_BUF(4); 2925 p = xdr_inline_decode(xdr, 4);
2894 READ32(n); 2926 if (unlikely(!p))
2927 goto out_overflow;
2928 n = be32_to_cpup(p);
2895 if (n <= 0) 2929 if (n <= 0)
2896 goto out_eio; 2930 goto out_eio;
2897 res->nlocations = 0; 2931 res->nlocations = 0;
@@ -2899,8 +2933,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2899 u32 m; 2933 u32 m;
2900 struct nfs4_fs_location *loc = &res->locations[res->nlocations]; 2934 struct nfs4_fs_location *loc = &res->locations[res->nlocations];
2901 2935
2902 READ_BUF(4); 2936 p = xdr_inline_decode(xdr, 4);
2903 READ32(m); 2937 if (unlikely(!p))
2938 goto out_overflow;
2939 m = be32_to_cpup(p);
2904 2940
2905 loc->nservers = 0; 2941 loc->nservers = 0;
2906 dprintk("%s: servers ", __func__); 2942 dprintk("%s: servers ", __func__);
@@ -2939,6 +2975,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2939out: 2975out:
2940 dprintk("%s: fs_locations done, error = %d\n", __func__, status); 2976 dprintk("%s: fs_locations done, error = %d\n", __func__, status);
2941 return status; 2977 return status;
2978out_overflow:
2979 print_overflow_msg(__func__, xdr);
2942out_eio: 2980out_eio:
2943 status = -EIO; 2981 status = -EIO;
2944 goto out; 2982 goto out;
@@ -2953,12 +2991,17 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin
2953 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U))) 2991 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U)))
2954 return -EIO; 2992 return -EIO;
2955 if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) { 2993 if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) {
2956 READ_BUF(8); 2994 p = xdr_inline_decode(xdr, 8);
2957 READ64(*res); 2995 if (unlikely(!p))
2996 goto out_overflow;
2997 xdr_decode_hyper(p, res);
2958 bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE; 2998 bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE;
2959 } 2999 }
2960 dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res); 3000 dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res);
2961 return status; 3001 return status;
3002out_overflow:
3003 print_overflow_msg(__func__, xdr);
3004 return -EIO;
2962} 3005}
2963 3006
2964static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink) 3007static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
@@ -2970,12 +3013,17 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
2970 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U))) 3013 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U)))
2971 return -EIO; 3014 return -EIO;
2972 if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) { 3015 if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) {
2973 READ_BUF(4); 3016 p = xdr_inline_decode(xdr, 4);
2974 READ32(*maxlink); 3017 if (unlikely(!p))
3018 goto out_overflow;
3019 *maxlink = be32_to_cpup(p);
2975 bitmap[0] &= ~FATTR4_WORD0_MAXLINK; 3020 bitmap[0] &= ~FATTR4_WORD0_MAXLINK;
2976 } 3021 }
2977 dprintk("%s: maxlink=%u\n", __func__, *maxlink); 3022 dprintk("%s: maxlink=%u\n", __func__, *maxlink);
2978 return status; 3023 return status;
3024out_overflow:
3025 print_overflow_msg(__func__, xdr);
3026 return -EIO;
2979} 3027}
2980 3028
2981static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname) 3029static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
@@ -2987,12 +3035,17 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
2987 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U))) 3035 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U)))
2988 return -EIO; 3036 return -EIO;
2989 if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) { 3037 if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) {
2990 READ_BUF(4); 3038 p = xdr_inline_decode(xdr, 4);
2991 READ32(*maxname); 3039 if (unlikely(!p))
3040 goto out_overflow;
3041 *maxname = be32_to_cpup(p);
2992 bitmap[0] &= ~FATTR4_WORD0_MAXNAME; 3042 bitmap[0] &= ~FATTR4_WORD0_MAXNAME;
2993 } 3043 }
2994 dprintk("%s: maxname=%u\n", __func__, *maxname); 3044 dprintk("%s: maxname=%u\n", __func__, *maxname);
2995 return status; 3045 return status;
3046out_overflow:
3047 print_overflow_msg(__func__, xdr);
3048 return -EIO;
2996} 3049}
2997 3050
2998static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3051static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3005,8 +3058,10 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
3005 return -EIO; 3058 return -EIO;
3006 if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) { 3059 if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) {
3007 uint64_t maxread; 3060 uint64_t maxread;
3008 READ_BUF(8); 3061 p = xdr_inline_decode(xdr, 8);
3009 READ64(maxread); 3062 if (unlikely(!p))
3063 goto out_overflow;
3064 xdr_decode_hyper(p, &maxread);
3010 if (maxread > 0x7FFFFFFF) 3065 if (maxread > 0x7FFFFFFF)
3011 maxread = 0x7FFFFFFF; 3066 maxread = 0x7FFFFFFF;
3012 *res = (uint32_t)maxread; 3067 *res = (uint32_t)maxread;
@@ -3014,6 +3069,9 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
3014 } 3069 }
3015 dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res); 3070 dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res);
3016 return status; 3071 return status;
3072out_overflow:
3073 print_overflow_msg(__func__, xdr);
3074 return -EIO;
3017} 3075}
3018 3076
3019static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3077static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3026,8 +3084,10 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
3026 return -EIO; 3084 return -EIO;
3027 if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) { 3085 if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) {
3028 uint64_t maxwrite; 3086 uint64_t maxwrite;
3029 READ_BUF(8); 3087 p = xdr_inline_decode(xdr, 8);
3030 READ64(maxwrite); 3088 if (unlikely(!p))
3089 goto out_overflow;
3090 xdr_decode_hyper(p, &maxwrite);
3031 if (maxwrite > 0x7FFFFFFF) 3091 if (maxwrite > 0x7FFFFFFF)
3032 maxwrite = 0x7FFFFFFF; 3092 maxwrite = 0x7FFFFFFF;
3033 *res = (uint32_t)maxwrite; 3093 *res = (uint32_t)maxwrite;
@@ -3035,6 +3095,9 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
3035 } 3095 }
3036 dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res); 3096 dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res);
3037 return status; 3097 return status;
3098out_overflow:
3099 print_overflow_msg(__func__, xdr);
3100 return -EIO;
3038} 3101}
3039 3102
3040static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode) 3103static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
@@ -3047,14 +3110,19 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m
3047 if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U))) 3110 if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
3048 return -EIO; 3111 return -EIO;
3049 if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { 3112 if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
3050 READ_BUF(4); 3113 p = xdr_inline_decode(xdr, 4);
3051 READ32(tmp); 3114 if (unlikely(!p))
3115 goto out_overflow;
3116 tmp = be32_to_cpup(p);
3052 *mode = tmp & ~S_IFMT; 3117 *mode = tmp & ~S_IFMT;
3053 bitmap[1] &= ~FATTR4_WORD1_MODE; 3118 bitmap[1] &= ~FATTR4_WORD1_MODE;
3054 ret = NFS_ATTR_FATTR_MODE; 3119 ret = NFS_ATTR_FATTR_MODE;
3055 } 3120 }
3056 dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); 3121 dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
3057 return ret; 3122 return ret;
3123out_overflow:
3124 print_overflow_msg(__func__, xdr);
3125 return -EIO;
3058} 3126}
3059 3127
3060static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink) 3128static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
@@ -3066,16 +3134,22 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
3066 if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U))) 3134 if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
3067 return -EIO; 3135 return -EIO;
3068 if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) { 3136 if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) {
3069 READ_BUF(4); 3137 p = xdr_inline_decode(xdr, 4);
3070 READ32(*nlink); 3138 if (unlikely(!p))
3139 goto out_overflow;
3140 *nlink = be32_to_cpup(p);
3071 bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; 3141 bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
3072 ret = NFS_ATTR_FATTR_NLINK; 3142 ret = NFS_ATTR_FATTR_NLINK;
3073 } 3143 }
3074 dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink); 3144 dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
3075 return ret; 3145 return ret;
3146out_overflow:
3147 print_overflow_msg(__func__, xdr);
3148 return -EIO;
3076} 3149}
3077 3150
3078static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid) 3151static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3152 struct nfs_client *clp, uint32_t *uid, int may_sleep)
3079{ 3153{
3080 uint32_t len; 3154 uint32_t len;
3081 __be32 *p; 3155 __be32 *p;
@@ -3085,10 +3159,16 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3085 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) 3159 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
3086 return -EIO; 3160 return -EIO;
3087 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { 3161 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
3088 READ_BUF(4); 3162 p = xdr_inline_decode(xdr, 4);
3089 READ32(len); 3163 if (unlikely(!p))
3090 READ_BUF(len); 3164 goto out_overflow;
3091 if (len < XDR_MAX_NETOBJ) { 3165 len = be32_to_cpup(p);
3166 p = xdr_inline_decode(xdr, len);
3167 if (unlikely(!p))
3168 goto out_overflow;
3169 if (!may_sleep) {
3170 /* do nothing */
3171 } else if (len < XDR_MAX_NETOBJ) {
3092 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) 3172 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0)
3093 ret = NFS_ATTR_FATTR_OWNER; 3173 ret = NFS_ATTR_FATTR_OWNER;
3094 else 3174 else
@@ -3101,9 +3181,13 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3101 } 3181 }
3102 dprintk("%s: uid=%d\n", __func__, (int)*uid); 3182 dprintk("%s: uid=%d\n", __func__, (int)*uid);
3103 return ret; 3183 return ret;
3184out_overflow:
3185 print_overflow_msg(__func__, xdr);
3186 return -EIO;
3104} 3187}
3105 3188
3106static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid) 3189static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3190 struct nfs_client *clp, uint32_t *gid, int may_sleep)
3107{ 3191{
3108 uint32_t len; 3192 uint32_t len;
3109 __be32 *p; 3193 __be32 *p;
@@ -3113,10 +3197,16 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3113 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) 3197 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
3114 return -EIO; 3198 return -EIO;
3115 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { 3199 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
3116 READ_BUF(4); 3200 p = xdr_inline_decode(xdr, 4);
3117 READ32(len); 3201 if (unlikely(!p))
3118 READ_BUF(len); 3202 goto out_overflow;
3119 if (len < XDR_MAX_NETOBJ) { 3203 len = be32_to_cpup(p);
3204 p = xdr_inline_decode(xdr, len);
3205 if (unlikely(!p))
3206 goto out_overflow;
3207 if (!may_sleep) {
3208 /* do nothing */
3209 } else if (len < XDR_MAX_NETOBJ) {
3120 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) 3210 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0)
3121 ret = NFS_ATTR_FATTR_GROUP; 3211 ret = NFS_ATTR_FATTR_GROUP;
3122 else 3212 else
@@ -3129,6 +3219,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
3129 } 3219 }
3130 dprintk("%s: gid=%d\n", __func__, (int)*gid); 3220 dprintk("%s: gid=%d\n", __func__, (int)*gid);
3131 return ret; 3221 return ret;
3222out_overflow:
3223 print_overflow_msg(__func__, xdr);
3224 return -EIO;
3132} 3225}
3133 3226
3134static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev) 3227static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
@@ -3143,9 +3236,11 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
3143 if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) { 3236 if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) {
3144 dev_t tmp; 3237 dev_t tmp;
3145 3238
3146 READ_BUF(8); 3239 p = xdr_inline_decode(xdr, 8);
3147 READ32(major); 3240 if (unlikely(!p))
3148 READ32(minor); 3241 goto out_overflow;
3242 major = be32_to_cpup(p++);
3243 minor = be32_to_cpup(p);
3149 tmp = MKDEV(major, minor); 3244 tmp = MKDEV(major, minor);
3150 if (MAJOR(tmp) == major && MINOR(tmp) == minor) 3245 if (MAJOR(tmp) == major && MINOR(tmp) == minor)
3151 *rdev = tmp; 3246 *rdev = tmp;
@@ -3154,6 +3249,9 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
3154 } 3249 }
3155 dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor); 3250 dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
3156 return ret; 3251 return ret;
3252out_overflow:
3253 print_overflow_msg(__func__, xdr);
3254 return -EIO;
3157} 3255}
3158 3256
3159static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3257static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3165,12 +3263,17 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
3165 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U))) 3263 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U)))
3166 return -EIO; 3264 return -EIO;
3167 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) { 3265 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) {
3168 READ_BUF(8); 3266 p = xdr_inline_decode(xdr, 8);
3169 READ64(*res); 3267 if (unlikely(!p))
3268 goto out_overflow;
3269 xdr_decode_hyper(p, res);
3170 bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL; 3270 bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL;
3171 } 3271 }
3172 dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res); 3272 dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res);
3173 return status; 3273 return status;
3274out_overflow:
3275 print_overflow_msg(__func__, xdr);
3276 return -EIO;
3174} 3277}
3175 3278
3176static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3279static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3182,12 +3285,17 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
3182 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U))) 3285 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U)))
3183 return -EIO; 3286 return -EIO;
3184 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) { 3287 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) {
3185 READ_BUF(8); 3288 p = xdr_inline_decode(xdr, 8);
3186 READ64(*res); 3289 if (unlikely(!p))
3290 goto out_overflow;
3291 xdr_decode_hyper(p, res);
3187 bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE; 3292 bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE;
3188 } 3293 }
3189 dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res); 3294 dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res);
3190 return status; 3295 return status;
3296out_overflow:
3297 print_overflow_msg(__func__, xdr);
3298 return -EIO;
3191} 3299}
3192 3300
3193static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3301static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3199,12 +3307,17 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
3199 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U))) 3307 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U)))
3200 return -EIO; 3308 return -EIO;
3201 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) { 3309 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) {
3202 READ_BUF(8); 3310 p = xdr_inline_decode(xdr, 8);
3203 READ64(*res); 3311 if (unlikely(!p))
3312 goto out_overflow;
3313 xdr_decode_hyper(p, res);
3204 bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL; 3314 bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL;
3205 } 3315 }
3206 dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res); 3316 dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res);
3207 return status; 3317 return status;
3318out_overflow:
3319 print_overflow_msg(__func__, xdr);
3320 return -EIO;
3208} 3321}
3209 3322
3210static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used) 3323static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
@@ -3216,14 +3329,19 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
3216 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U))) 3329 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
3217 return -EIO; 3330 return -EIO;
3218 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) { 3331 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) {
3219 READ_BUF(8); 3332 p = xdr_inline_decode(xdr, 8);
3220 READ64(*used); 3333 if (unlikely(!p))
3334 goto out_overflow;
3335 xdr_decode_hyper(p, used);
3221 bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; 3336 bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
3222 ret = NFS_ATTR_FATTR_SPACE_USED; 3337 ret = NFS_ATTR_FATTR_SPACE_USED;
3223 } 3338 }
3224 dprintk("%s: space used=%Lu\n", __func__, 3339 dprintk("%s: space used=%Lu\n", __func__,
3225 (unsigned long long)*used); 3340 (unsigned long long)*used);
3226 return ret; 3341 return ret;
3342out_overflow:
3343 print_overflow_msg(__func__, xdr);
3344 return -EIO;
3227} 3345}
3228 3346
3229static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) 3347static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
@@ -3232,12 +3350,17 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
3232 uint64_t sec; 3350 uint64_t sec;
3233 uint32_t nsec; 3351 uint32_t nsec;
3234 3352
3235 READ_BUF(12); 3353 p = xdr_inline_decode(xdr, 12);
3236 READ64(sec); 3354 if (unlikely(!p))
3237 READ32(nsec); 3355 goto out_overflow;
3356 p = xdr_decode_hyper(p, &sec);
3357 nsec = be32_to_cpup(p);
3238 time->tv_sec = (time_t)sec; 3358 time->tv_sec = (time_t)sec;
3239 time->tv_nsec = (long)nsec; 3359 time->tv_nsec = (long)nsec;
3240 return 0; 3360 return 0;
3361out_overflow:
3362 print_overflow_msg(__func__, xdr);
3363 return -EIO;
3241} 3364}
3242 3365
3243static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) 3366static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
@@ -3315,11 +3438,16 @@ static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *c
3315{ 3438{
3316 __be32 *p; 3439 __be32 *p;
3317 3440
3318 READ_BUF(20); 3441 p = xdr_inline_decode(xdr, 20);
3319 READ32(cinfo->atomic); 3442 if (unlikely(!p))
3320 READ64(cinfo->before); 3443 goto out_overflow;
3321 READ64(cinfo->after); 3444 cinfo->atomic = be32_to_cpup(p++);
3445 p = xdr_decode_hyper(p, &cinfo->before);
3446 xdr_decode_hyper(p, &cinfo->after);
3322 return 0; 3447 return 0;
3448out_overflow:
3449 print_overflow_msg(__func__, xdr);
3450 return -EIO;
3323} 3451}
3324 3452
3325static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) 3453static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
@@ -3331,40 +3459,62 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
3331 status = decode_op_hdr(xdr, OP_ACCESS); 3459 status = decode_op_hdr(xdr, OP_ACCESS);
3332 if (status) 3460 if (status)
3333 return status; 3461 return status;
3334 READ_BUF(8); 3462 p = xdr_inline_decode(xdr, 8);
3335 READ32(supp); 3463 if (unlikely(!p))
3336 READ32(acc); 3464 goto out_overflow;
3465 supp = be32_to_cpup(p++);
3466 acc = be32_to_cpup(p);
3337 access->supported = supp; 3467 access->supported = supp;
3338 access->access = acc; 3468 access->access = acc;
3339 return 0; 3469 return 0;
3470out_overflow:
3471 print_overflow_msg(__func__, xdr);
3472 return -EIO;
3340} 3473}
3341 3474
3342static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) 3475static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
3343{ 3476{
3344 __be32 *p; 3477 __be32 *p;
3478
3479 p = xdr_inline_decode(xdr, len);
3480 if (likely(p)) {
3481 memcpy(buf, p, len);
3482 return 0;
3483 }
3484 print_overflow_msg(__func__, xdr);
3485 return -EIO;
3486}
3487
3488static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
3489{
3490 return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
3491}
3492
3493static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
3494{
3345 int status; 3495 int status;
3346 3496
3347 status = decode_op_hdr(xdr, OP_CLOSE); 3497 status = decode_op_hdr(xdr, OP_CLOSE);
3348 if (status != -EIO) 3498 if (status != -EIO)
3349 nfs_increment_open_seqid(status, res->seqid); 3499 nfs_increment_open_seqid(status, res->seqid);
3350 if (status) 3500 if (!status)
3351 return status; 3501 status = decode_stateid(xdr, &res->stateid);
3352 READ_BUF(NFS4_STATEID_SIZE); 3502 return status;
3353 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3503}
3354 return 0; 3504
3505static int decode_verifier(struct xdr_stream *xdr, void *verifier)
3506{
3507 return decode_opaque_fixed(xdr, verifier, 8);
3355} 3508}
3356 3509
3357static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) 3510static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
3358{ 3511{
3359 __be32 *p;
3360 int status; 3512 int status;
3361 3513
3362 status = decode_op_hdr(xdr, OP_COMMIT); 3514 status = decode_op_hdr(xdr, OP_COMMIT);
3363 if (status) 3515 if (!status)
3364 return status; 3516 status = decode_verifier(xdr, res->verf->verifier);
3365 READ_BUF(8); 3517 return status;
3366 COPYMEM(res->verf->verifier, 8);
3367 return 0;
3368} 3518}
3369 3519
3370static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 3520static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3378,10 +3528,16 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
3378 return status; 3528 return status;
3379 if ((status = decode_change_info(xdr, cinfo))) 3529 if ((status = decode_change_info(xdr, cinfo)))
3380 return status; 3530 return status;
3381 READ_BUF(4); 3531 p = xdr_inline_decode(xdr, 4);
3382 READ32(bmlen); 3532 if (unlikely(!p))
3383 READ_BUF(bmlen << 2); 3533 goto out_overflow;
3384 return 0; 3534 bmlen = be32_to_cpup(p);
3535 p = xdr_inline_decode(xdr, bmlen << 2);
3536 if (likely(p))
3537 return 0;
3538out_overflow:
3539 print_overflow_msg(__func__, xdr);
3540 return -EIO;
3385} 3541}
3386 3542
3387static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) 3543static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
@@ -3466,7 +3622,8 @@ xdr_error:
3466 return status; 3622 return status;
3467} 3623}
3468 3624
3469static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, const struct nfs_server *server) 3625static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3626 const struct nfs_server *server, int may_sleep)
3470{ 3627{
3471 __be32 *savep; 3628 __be32 *savep;
3472 uint32_t attrlen, 3629 uint32_t attrlen,
@@ -3538,12 +3695,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
3538 goto xdr_error; 3695 goto xdr_error;
3539 fattr->valid |= status; 3696 fattr->valid |= status;
3540 3697
3541 status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid); 3698 status = decode_attr_owner(xdr, bitmap, server->nfs_client,
3699 &fattr->uid, may_sleep);
3542 if (status < 0) 3700 if (status < 0)
3543 goto xdr_error; 3701 goto xdr_error;
3544 fattr->valid |= status; 3702 fattr->valid |= status;
3545 3703
3546 status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid); 3704 status = decode_attr_group(xdr, bitmap, server->nfs_client,
3705 &fattr->gid, may_sleep);
3547 if (status < 0) 3706 if (status < 0)
3548 goto xdr_error; 3707 goto xdr_error;
3549 fattr->valid |= status; 3708 fattr->valid |= status;
@@ -3633,14 +3792,21 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
3633 if (status) 3792 if (status)
3634 return status; 3793 return status;
3635 3794
3636 READ_BUF(4); 3795 p = xdr_inline_decode(xdr, 4);
3637 READ32(len); 3796 if (unlikely(!p))
3797 goto out_overflow;
3798 len = be32_to_cpup(p);
3638 if (len > NFS4_FHSIZE) 3799 if (len > NFS4_FHSIZE)
3639 return -EIO; 3800 return -EIO;
3640 fh->size = len; 3801 fh->size = len;
3641 READ_BUF(len); 3802 p = xdr_inline_decode(xdr, len);
3642 COPYMEM(fh->data, len); 3803 if (unlikely(!p))
3804 goto out_overflow;
3805 memcpy(fh->data, p, len);
3643 return 0; 3806 return 0;
3807out_overflow:
3808 print_overflow_msg(__func__, xdr);
3809 return -EIO;
3644} 3810}
3645 3811
3646static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 3812static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3662,10 +3828,12 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3662 __be32 *p; 3828 __be32 *p;
3663 uint32_t namelen, type; 3829 uint32_t namelen, type;
3664 3830
3665 READ_BUF(32); 3831 p = xdr_inline_decode(xdr, 32);
3666 READ64(offset); 3832 if (unlikely(!p))
3667 READ64(length); 3833 goto out_overflow;
3668 READ32(type); 3834 p = xdr_decode_hyper(p, &offset);
3835 p = xdr_decode_hyper(p, &length);
3836 type = be32_to_cpup(p++);
3669 if (fl != NULL) { 3837 if (fl != NULL) {
3670 fl->fl_start = (loff_t)offset; 3838 fl->fl_start = (loff_t)offset;
3671 fl->fl_end = fl->fl_start + (loff_t)length - 1; 3839 fl->fl_end = fl->fl_start + (loff_t)length - 1;
@@ -3676,23 +3844,27 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3676 fl->fl_type = F_RDLCK; 3844 fl->fl_type = F_RDLCK;
3677 fl->fl_pid = 0; 3845 fl->fl_pid = 0;
3678 } 3846 }
3679 READ64(clientid); 3847 p = xdr_decode_hyper(p, &clientid);
3680 READ32(namelen); 3848 namelen = be32_to_cpup(p);
3681 READ_BUF(namelen); 3849 p = xdr_inline_decode(xdr, namelen);
3682 return -NFS4ERR_DENIED; 3850 if (likely(p))
3851 return -NFS4ERR_DENIED;
3852out_overflow:
3853 print_overflow_msg(__func__, xdr);
3854 return -EIO;
3683} 3855}
3684 3856
3685static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) 3857static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
3686{ 3858{
3687 __be32 *p;
3688 int status; 3859 int status;
3689 3860
3690 status = decode_op_hdr(xdr, OP_LOCK); 3861 status = decode_op_hdr(xdr, OP_LOCK);
3691 if (status == -EIO) 3862 if (status == -EIO)
3692 goto out; 3863 goto out;
3693 if (status == 0) { 3864 if (status == 0) {
3694 READ_BUF(NFS4_STATEID_SIZE); 3865 status = decode_stateid(xdr, &res->stateid);
3695 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3866 if (unlikely(status))
3867 goto out;
3696 } else if (status == -NFS4ERR_DENIED) 3868 } else if (status == -NFS4ERR_DENIED)
3697 status = decode_lock_denied(xdr, NULL); 3869 status = decode_lock_denied(xdr, NULL);
3698 if (res->open_seqid != NULL) 3870 if (res->open_seqid != NULL)
@@ -3713,16 +3885,13 @@ static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res)
3713 3885
3714static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) 3886static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
3715{ 3887{
3716 __be32 *p;
3717 int status; 3888 int status;
3718 3889
3719 status = decode_op_hdr(xdr, OP_LOCKU); 3890 status = decode_op_hdr(xdr, OP_LOCKU);
3720 if (status != -EIO) 3891 if (status != -EIO)
3721 nfs_increment_lock_seqid(status, res->seqid); 3892 nfs_increment_lock_seqid(status, res->seqid);
3722 if (status == 0) { 3893 if (status == 0)
3723 READ_BUF(NFS4_STATEID_SIZE); 3894 status = decode_stateid(xdr, &res->stateid);
3724 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3725 }
3726 return status; 3895 return status;
3727} 3896}
3728 3897
@@ -3737,34 +3906,46 @@ static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
3737 __be32 *p; 3906 __be32 *p;
3738 uint32_t limit_type, nblocks, blocksize; 3907 uint32_t limit_type, nblocks, blocksize;
3739 3908
3740 READ_BUF(12); 3909 p = xdr_inline_decode(xdr, 12);
3741 READ32(limit_type); 3910 if (unlikely(!p))
3911 goto out_overflow;
3912 limit_type = be32_to_cpup(p++);
3742 switch (limit_type) { 3913 switch (limit_type) {
3743 case 1: 3914 case 1:
3744 READ64(*maxsize); 3915 xdr_decode_hyper(p, maxsize);
3745 break; 3916 break;
3746 case 2: 3917 case 2:
3747 READ32(nblocks); 3918 nblocks = be32_to_cpup(p++);
3748 READ32(blocksize); 3919 blocksize = be32_to_cpup(p);
3749 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; 3920 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
3750 } 3921 }
3751 return 0; 3922 return 0;
3923out_overflow:
3924 print_overflow_msg(__func__, xdr);
3925 return -EIO;
3752} 3926}
3753 3927
3754static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) 3928static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3755{ 3929{
3756 __be32 *p; 3930 __be32 *p;
3757 uint32_t delegation_type; 3931 uint32_t delegation_type;
3932 int status;
3758 3933
3759 READ_BUF(4); 3934 p = xdr_inline_decode(xdr, 4);
3760 READ32(delegation_type); 3935 if (unlikely(!p))
3936 goto out_overflow;
3937 delegation_type = be32_to_cpup(p);
3761 if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { 3938 if (delegation_type == NFS4_OPEN_DELEGATE_NONE) {
3762 res->delegation_type = 0; 3939 res->delegation_type = 0;
3763 return 0; 3940 return 0;
3764 } 3941 }
3765 READ_BUF(NFS4_STATEID_SIZE+4); 3942 status = decode_stateid(xdr, &res->delegation);
3766 COPYMEM(res->delegation.data, NFS4_STATEID_SIZE); 3943 if (unlikely(status))
3767 READ32(res->do_recall); 3944 return status;
3945 p = xdr_inline_decode(xdr, 4);
3946 if (unlikely(!p))
3947 goto out_overflow;
3948 res->do_recall = be32_to_cpup(p);
3768 3949
3769 switch (delegation_type) { 3950 switch (delegation_type) {
3770 case NFS4_OPEN_DELEGATE_READ: 3951 case NFS4_OPEN_DELEGATE_READ:
@@ -3776,6 +3957,9 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3776 return -EIO; 3957 return -EIO;
3777 } 3958 }
3778 return decode_ace(xdr, NULL, res->server->nfs_client); 3959 return decode_ace(xdr, NULL, res->server->nfs_client);
3960out_overflow:
3961 print_overflow_msg(__func__, xdr);
3962 return -EIO;
3779} 3963}
3780 3964
3781static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) 3965static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3787,23 +3971,27 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3787 status = decode_op_hdr(xdr, OP_OPEN); 3971 status = decode_op_hdr(xdr, OP_OPEN);
3788 if (status != -EIO) 3972 if (status != -EIO)
3789 nfs_increment_open_seqid(status, res->seqid); 3973 nfs_increment_open_seqid(status, res->seqid);
3790 if (status) 3974 if (!status)
3975 status = decode_stateid(xdr, &res->stateid);
3976 if (unlikely(status))
3791 return status; 3977 return status;
3792 READ_BUF(NFS4_STATEID_SIZE);
3793 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3794 3978
3795 decode_change_info(xdr, &res->cinfo); 3979 decode_change_info(xdr, &res->cinfo);
3796 3980
3797 READ_BUF(8); 3981 p = xdr_inline_decode(xdr, 8);
3798 READ32(res->rflags); 3982 if (unlikely(!p))
3799 READ32(bmlen); 3983 goto out_overflow;
3984 res->rflags = be32_to_cpup(p++);
3985 bmlen = be32_to_cpup(p);
3800 if (bmlen > 10) 3986 if (bmlen > 10)
3801 goto xdr_error; 3987 goto xdr_error;
3802 3988
3803 READ_BUF(bmlen << 2); 3989 p = xdr_inline_decode(xdr, bmlen << 2);
3990 if (unlikely(!p))
3991 goto out_overflow;
3804 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); 3992 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
3805 for (i = 0; i < savewords; ++i) 3993 for (i = 0; i < savewords; ++i)
3806 READ32(res->attrset[i]); 3994 res->attrset[i] = be32_to_cpup(p++);
3807 for (; i < NFS4_BITMAP_SIZE; i++) 3995 for (; i < NFS4_BITMAP_SIZE; i++)
3808 res->attrset[i] = 0; 3996 res->attrset[i] = 0;
3809 3997
@@ -3811,36 +3999,33 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3811xdr_error: 3999xdr_error:
3812 dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); 4000 dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
3813 return -EIO; 4001 return -EIO;
4002out_overflow:
4003 print_overflow_msg(__func__, xdr);
4004 return -EIO;
3814} 4005}
3815 4006
3816static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) 4007static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
3817{ 4008{
3818 __be32 *p;
3819 int status; 4009 int status;
3820 4010
3821 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); 4011 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
3822 if (status != -EIO) 4012 if (status != -EIO)
3823 nfs_increment_open_seqid(status, res->seqid); 4013 nfs_increment_open_seqid(status, res->seqid);
3824 if (status) 4014 if (!status)
3825 return status; 4015 status = decode_stateid(xdr, &res->stateid);
3826 READ_BUF(NFS4_STATEID_SIZE); 4016 return status;
3827 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3828 return 0;
3829} 4017}
3830 4018
3831static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) 4019static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
3832{ 4020{
3833 __be32 *p;
3834 int status; 4021 int status;
3835 4022
3836 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); 4023 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
3837 if (status != -EIO) 4024 if (status != -EIO)
3838 nfs_increment_open_seqid(status, res->seqid); 4025 nfs_increment_open_seqid(status, res->seqid);
3839 if (status) 4026 if (!status)
3840 return status; 4027 status = decode_stateid(xdr, &res->stateid);
3841 READ_BUF(NFS4_STATEID_SIZE); 4028 return status;
3842 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3843 return 0;
3844} 4029}
3845 4030
3846static int decode_putfh(struct xdr_stream *xdr) 4031static int decode_putfh(struct xdr_stream *xdr)
@@ -3863,9 +4048,11 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
3863 status = decode_op_hdr(xdr, OP_READ); 4048 status = decode_op_hdr(xdr, OP_READ);
3864 if (status) 4049 if (status)
3865 return status; 4050 return status;
3866 READ_BUF(8); 4051 p = xdr_inline_decode(xdr, 8);
3867 READ32(eof); 4052 if (unlikely(!p))
3868 READ32(count); 4053 goto out_overflow;
4054 eof = be32_to_cpup(p++);
4055 count = be32_to_cpup(p);
3869 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 4056 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
3870 recvd = req->rq_rcv_buf.len - hdrlen; 4057 recvd = req->rq_rcv_buf.len - hdrlen;
3871 if (count > recvd) { 4058 if (count > recvd) {
@@ -3878,6 +4065,9 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
3878 res->eof = eof; 4065 res->eof = eof;
3879 res->count = count; 4066 res->count = count;
3880 return 0; 4067 return 0;
4068out_overflow:
4069 print_overflow_msg(__func__, xdr);
4070 return -EIO;
3881} 4071}
3882 4072
3883static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) 4073static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
@@ -3892,17 +4082,17 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3892 int status; 4082 int status;
3893 4083
3894 status = decode_op_hdr(xdr, OP_READDIR); 4084 status = decode_op_hdr(xdr, OP_READDIR);
3895 if (status) 4085 if (!status)
4086 status = decode_verifier(xdr, readdir->verifier.data);
4087 if (unlikely(status))
3896 return status; 4088 return status;
3897 READ_BUF(8);
3898 COPYMEM(readdir->verifier.data, 8);
3899 dprintk("%s: verifier = %08x:%08x\n", 4089 dprintk("%s: verifier = %08x:%08x\n",
3900 __func__, 4090 __func__,
3901 ((u32 *)readdir->verifier.data)[0], 4091 ((u32 *)readdir->verifier.data)[0],
3902 ((u32 *)readdir->verifier.data)[1]); 4092 ((u32 *)readdir->verifier.data)[1]);
3903 4093
3904 4094
3905 hdrlen = (char *) p - (char *) iov->iov_base; 4095 hdrlen = (char *) xdr->p - (char *) iov->iov_base;
3906 recvd = rcvbuf->len - hdrlen; 4096 recvd = rcvbuf->len - hdrlen;
3907 if (pglen > recvd) 4097 if (pglen > recvd)
3908 pglen = recvd; 4098 pglen = recvd;
@@ -3990,8 +4180,10 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
3990 return status; 4180 return status;
3991 4181
3992 /* Convert length of symlink */ 4182 /* Convert length of symlink */
3993 READ_BUF(4); 4183 p = xdr_inline_decode(xdr, 4);
3994 READ32(len); 4184 if (unlikely(!p))
4185 goto out_overflow;
4186 len = be32_to_cpup(p);
3995 if (len >= rcvbuf->page_len || len <= 0) { 4187 if (len >= rcvbuf->page_len || len <= 0) {
3996 dprintk("nfs: server returned giant symlink!\n"); 4188 dprintk("nfs: server returned giant symlink!\n");
3997 return -ENAMETOOLONG; 4189 return -ENAMETOOLONG;
@@ -4015,6 +4207,9 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4015 kaddr[len+rcvbuf->page_base] = '\0'; 4207 kaddr[len+rcvbuf->page_base] = '\0';
4016 kunmap_atomic(kaddr, KM_USER0); 4208 kunmap_atomic(kaddr, KM_USER0);
4017 return 0; 4209 return 0;
4210out_overflow:
4211 print_overflow_msg(__func__, xdr);
4212 return -EIO;
4018} 4213}
4019 4214
4020static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 4215static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -4112,10 +4307,16 @@ static int decode_setattr(struct xdr_stream *xdr)
4112 status = decode_op_hdr(xdr, OP_SETATTR); 4307 status = decode_op_hdr(xdr, OP_SETATTR);
4113 if (status) 4308 if (status)
4114 return status; 4309 return status;
4115 READ_BUF(4); 4310 p = xdr_inline_decode(xdr, 4);
4116 READ32(bmlen); 4311 if (unlikely(!p))
4117 READ_BUF(bmlen << 2); 4312 goto out_overflow;
4118 return 0; 4313 bmlen = be32_to_cpup(p);
4314 p = xdr_inline_decode(xdr, bmlen << 2);
4315 if (likely(p))
4316 return 0;
4317out_overflow:
4318 print_overflow_msg(__func__, xdr);
4319 return -EIO;
4119} 4320}
4120 4321
4121static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) 4322static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
@@ -4124,35 +4325,50 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
4124 uint32_t opnum; 4325 uint32_t opnum;
4125 int32_t nfserr; 4326 int32_t nfserr;
4126 4327
4127 READ_BUF(8); 4328 p = xdr_inline_decode(xdr, 8);
4128 READ32(opnum); 4329 if (unlikely(!p))
4330 goto out_overflow;
4331 opnum = be32_to_cpup(p++);
4129 if (opnum != OP_SETCLIENTID) { 4332 if (opnum != OP_SETCLIENTID) {
4130 dprintk("nfs: decode_setclientid: Server returned operation" 4333 dprintk("nfs: decode_setclientid: Server returned operation"
4131 " %d\n", opnum); 4334 " %d\n", opnum);
4132 return -EIO; 4335 return -EIO;
4133 } 4336 }
4134 READ32(nfserr); 4337 nfserr = be32_to_cpup(p);
4135 if (nfserr == NFS_OK) { 4338 if (nfserr == NFS_OK) {
4136 READ_BUF(8 + NFS4_VERIFIER_SIZE); 4339 p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
4137 READ64(clp->cl_clientid); 4340 if (unlikely(!p))
4138 COPYMEM(clp->cl_confirm.data, NFS4_VERIFIER_SIZE); 4341 goto out_overflow;
4342 p = xdr_decode_hyper(p, &clp->cl_clientid);
4343 memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE);
4139 } else if (nfserr == NFSERR_CLID_INUSE) { 4344 } else if (nfserr == NFSERR_CLID_INUSE) {
4140 uint32_t len; 4345 uint32_t len;
4141 4346
4142 /* skip netid string */ 4347 /* skip netid string */
4143 READ_BUF(4); 4348 p = xdr_inline_decode(xdr, 4);
4144 READ32(len); 4349 if (unlikely(!p))
4145 READ_BUF(len); 4350 goto out_overflow;
4351 len = be32_to_cpup(p);
4352 p = xdr_inline_decode(xdr, len);
4353 if (unlikely(!p))
4354 goto out_overflow;
4146 4355
4147 /* skip uaddr string */ 4356 /* skip uaddr string */
4148 READ_BUF(4); 4357 p = xdr_inline_decode(xdr, 4);
4149 READ32(len); 4358 if (unlikely(!p))
4150 READ_BUF(len); 4359 goto out_overflow;
4360 len = be32_to_cpup(p);
4361 p = xdr_inline_decode(xdr, len);
4362 if (unlikely(!p))
4363 goto out_overflow;
4151 return -NFSERR_CLID_INUSE; 4364 return -NFSERR_CLID_INUSE;
4152 } else 4365 } else
4153 return nfs4_stat_to_errno(nfserr); 4366 return nfs4_stat_to_errno(nfserr);
4154 4367
4155 return 0; 4368 return 0;
4369out_overflow:
4370 print_overflow_msg(__func__, xdr);
4371 return -EIO;
4156} 4372}
4157 4373
4158static int decode_setclientid_confirm(struct xdr_stream *xdr) 4374static int decode_setclientid_confirm(struct xdr_stream *xdr)
@@ -4169,11 +4385,16 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
4169 if (status) 4385 if (status)
4170 return status; 4386 return status;
4171 4387
4172 READ_BUF(16); 4388 p = xdr_inline_decode(xdr, 16);
4173 READ32(res->count); 4389 if (unlikely(!p))
4174 READ32(res->verf->committed); 4390 goto out_overflow;
4175 COPYMEM(res->verf->verifier, 8); 4391 res->count = be32_to_cpup(p++);
4392 res->verf->committed = be32_to_cpup(p++);
4393 memcpy(res->verf->verifier, p, 8);
4176 return 0; 4394 return 0;
4395out_overflow:
4396 print_overflow_msg(__func__, xdr);
4397 return -EIO;
4177} 4398}
4178 4399
4179static int decode_delegreturn(struct xdr_stream *xdr) 4400static int decode_delegreturn(struct xdr_stream *xdr)
@@ -4187,6 +4408,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4187{ 4408{
4188 __be32 *p; 4409 __be32 *p;
4189 uint32_t dummy; 4410 uint32_t dummy;
4411 char *dummy_str;
4190 int status; 4412 int status;
4191 struct nfs_client *clp = res->client; 4413 struct nfs_client *clp = res->client;
4192 4414
@@ -4194,36 +4416,45 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4194 if (status) 4416 if (status)
4195 return status; 4417 return status;
4196 4418
4197 READ_BUF(8); 4419 p = xdr_inline_decode(xdr, 8);
4198 READ64(clp->cl_ex_clid); 4420 if (unlikely(!p))
4199 READ_BUF(12); 4421 goto out_overflow;
4200 READ32(clp->cl_seqid); 4422 xdr_decode_hyper(p, &clp->cl_ex_clid);
4201 READ32(clp->cl_exchange_flags); 4423 p = xdr_inline_decode(xdr, 12);
4424 if (unlikely(!p))
4425 goto out_overflow;
4426 clp->cl_seqid = be32_to_cpup(p++);
4427 clp->cl_exchange_flags = be32_to_cpup(p++);
4202 4428
4203 /* We ask for SP4_NONE */ 4429 /* We ask for SP4_NONE */
4204 READ32(dummy); 4430 dummy = be32_to_cpup(p);
4205 if (dummy != SP4_NONE) 4431 if (dummy != SP4_NONE)
4206 return -EIO; 4432 return -EIO;
4207 4433
4208 /* Throw away minor_id */ 4434 /* Throw away minor_id */
4209 READ_BUF(8); 4435 p = xdr_inline_decode(xdr, 8);
4436 if (unlikely(!p))
4437 goto out_overflow;
4210 4438
4211 /* Throw away Major id */ 4439 /* Throw away Major id */
4212 READ_BUF(4); 4440 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4213 READ32(dummy); 4441 if (unlikely(status))
4214 READ_BUF(dummy); 4442 return status;
4215 4443
4216 /* Throw away server_scope */ 4444 /* Throw away server_scope */
4217 READ_BUF(4); 4445 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4218 READ32(dummy); 4446 if (unlikely(status))
4219 READ_BUF(dummy); 4447 return status;
4220 4448
4221 /* Throw away Implementation id array */ 4449 /* Throw away Implementation id array */
4222 READ_BUF(4); 4450 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4223 READ32(dummy); 4451 if (unlikely(status))
4224 READ_BUF(dummy); 4452 return status;
4225 4453
4226 return 0; 4454 return 0;
4455out_overflow:
4456 print_overflow_msg(__func__, xdr);
4457 return -EIO;
4227} 4458}
4228 4459
4229static int decode_chan_attrs(struct xdr_stream *xdr, 4460static int decode_chan_attrs(struct xdr_stream *xdr,
@@ -4232,22 +4463,35 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
4232 __be32 *p; 4463 __be32 *p;
4233 u32 nr_attrs; 4464 u32 nr_attrs;
4234 4465
4235 READ_BUF(28); 4466 p = xdr_inline_decode(xdr, 28);
4236 READ32(attrs->headerpadsz); 4467 if (unlikely(!p))
4237 READ32(attrs->max_rqst_sz); 4468 goto out_overflow;
4238 READ32(attrs->max_resp_sz); 4469 attrs->headerpadsz = be32_to_cpup(p++);
4239 READ32(attrs->max_resp_sz_cached); 4470 attrs->max_rqst_sz = be32_to_cpup(p++);
4240 READ32(attrs->max_ops); 4471 attrs->max_resp_sz = be32_to_cpup(p++);
4241 READ32(attrs->max_reqs); 4472 attrs->max_resp_sz_cached = be32_to_cpup(p++);
4242 READ32(nr_attrs); 4473 attrs->max_ops = be32_to_cpup(p++);
4474 attrs->max_reqs = be32_to_cpup(p++);
4475 nr_attrs = be32_to_cpup(p);
4243 if (unlikely(nr_attrs > 1)) { 4476 if (unlikely(nr_attrs > 1)) {
4244 printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", 4477 printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n",
4245 __func__, nr_attrs); 4478 __func__, nr_attrs);
4246 return -EINVAL; 4479 return -EINVAL;
4247 } 4480 }
4248 if (nr_attrs == 1) 4481 if (nr_attrs == 1) {
4249 READ_BUF(4); /* skip rdma_attrs */ 4482 p = xdr_inline_decode(xdr, 4); /* skip rdma_attrs */
4483 if (unlikely(!p))
4484 goto out_overflow;
4485 }
4250 return 0; 4486 return 0;
4487out_overflow:
4488 print_overflow_msg(__func__, xdr);
4489 return -EIO;
4490}
4491
4492static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
4493{
4494 return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
4251} 4495}
4252 4496
4253static int decode_create_session(struct xdr_stream *xdr, 4497static int decode_create_session(struct xdr_stream *xdr,
@@ -4259,24 +4503,26 @@ static int decode_create_session(struct xdr_stream *xdr,
4259 struct nfs4_session *session = clp->cl_session; 4503 struct nfs4_session *session = clp->cl_session;
4260 4504
4261 status = decode_op_hdr(xdr, OP_CREATE_SESSION); 4505 status = decode_op_hdr(xdr, OP_CREATE_SESSION);
4262 4506 if (!status)
4263 if (status) 4507 status = decode_sessionid(xdr, &session->sess_id);
4508 if (unlikely(status))
4264 return status; 4509 return status;
4265 4510
4266 /* sessionid */
4267 READ_BUF(NFS4_MAX_SESSIONID_LEN);
4268 COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN);
4269
4270 /* seqid, flags */ 4511 /* seqid, flags */
4271 READ_BUF(8); 4512 p = xdr_inline_decode(xdr, 8);
4272 READ32(clp->cl_seqid); 4513 if (unlikely(!p))
4273 READ32(session->flags); 4514 goto out_overflow;
4515 clp->cl_seqid = be32_to_cpup(p++);
4516 session->flags = be32_to_cpup(p);
4274 4517
4275 /* Channel attributes */ 4518 /* Channel attributes */
4276 status = decode_chan_attrs(xdr, &session->fc_attrs); 4519 status = decode_chan_attrs(xdr, &session->fc_attrs);
4277 if (!status) 4520 if (!status)
4278 status = decode_chan_attrs(xdr, &session->bc_attrs); 4521 status = decode_chan_attrs(xdr, &session->bc_attrs);
4279 return status; 4522 return status;
4523out_overflow:
4524 print_overflow_msg(__func__, xdr);
4525 return -EIO;
4280} 4526}
4281 4527
4282static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) 4528static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
@@ -4300,7 +4546,9 @@ static int decode_sequence(struct xdr_stream *xdr,
4300 return 0; 4546 return 0;
4301 4547
4302 status = decode_op_hdr(xdr, OP_SEQUENCE); 4548 status = decode_op_hdr(xdr, OP_SEQUENCE);
4303 if (status) 4549 if (!status)
4550 status = decode_sessionid(xdr, &id);
4551 if (unlikely(status))
4304 goto out_err; 4552 goto out_err;
4305 4553
4306 /* 4554 /*
@@ -4309,36 +4557,43 @@ static int decode_sequence(struct xdr_stream *xdr,
4309 */ 4557 */
4310 status = -ESERVERFAULT; 4558 status = -ESERVERFAULT;
4311 4559
4312 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4313 READ_BUF(NFS4_MAX_SESSIONID_LEN + 20);
4314 COPYMEM(id.data, NFS4_MAX_SESSIONID_LEN);
4315 if (memcmp(id.data, res->sr_session->sess_id.data, 4560 if (memcmp(id.data, res->sr_session->sess_id.data,
4316 NFS4_MAX_SESSIONID_LEN)) { 4561 NFS4_MAX_SESSIONID_LEN)) {
4317 dprintk("%s Invalid session id\n", __func__); 4562 dprintk("%s Invalid session id\n", __func__);
4318 goto out_err; 4563 goto out_err;
4319 } 4564 }
4565
4566 p = xdr_inline_decode(xdr, 20);
4567 if (unlikely(!p))
4568 goto out_overflow;
4569
4320 /* seqid */ 4570 /* seqid */
4321 READ32(dummy); 4571 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4572 dummy = be32_to_cpup(p++);
4322 if (dummy != slot->seq_nr) { 4573 if (dummy != slot->seq_nr) {
4323 dprintk("%s Invalid sequence number\n", __func__); 4574 dprintk("%s Invalid sequence number\n", __func__);
4324 goto out_err; 4575 goto out_err;
4325 } 4576 }
4326 /* slot id */ 4577 /* slot id */
4327 READ32(dummy); 4578 dummy = be32_to_cpup(p++);
4328 if (dummy != res->sr_slotid) { 4579 if (dummy != res->sr_slotid) {
4329 dprintk("%s Invalid slot id\n", __func__); 4580 dprintk("%s Invalid slot id\n", __func__);
4330 goto out_err; 4581 goto out_err;
4331 } 4582 }
4332 /* highest slot id - currently not processed */ 4583 /* highest slot id - currently not processed */
4333 READ32(dummy); 4584 dummy = be32_to_cpup(p++);
4334 /* target highest slot id - currently not processed */ 4585 /* target highest slot id - currently not processed */
4335 READ32(dummy); 4586 dummy = be32_to_cpup(p++);
4336 /* result flags - currently not processed */ 4587 /* result flags - currently not processed */
4337 READ32(dummy); 4588 dummy = be32_to_cpup(p);
4338 status = 0; 4589 status = 0;
4339out_err: 4590out_err:
4340 res->sr_status = status; 4591 res->sr_status = status;
4341 return status; 4592 return status;
4593out_overflow:
4594 print_overflow_msg(__func__, xdr);
4595 status = -EIO;
4596 goto out_err;
4342#else /* CONFIG_NFS_V4_1 */ 4597#else /* CONFIG_NFS_V4_1 */
4343 return 0; 4598 return 0;
4344#endif /* CONFIG_NFS_V4_1 */ 4599#endif /* CONFIG_NFS_V4_1 */
@@ -4370,7 +4625,8 @@ static int nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, __be32 *p, struct
4370 status = decode_open_downgrade(&xdr, res); 4625 status = decode_open_downgrade(&xdr, res);
4371 if (status != 0) 4626 if (status != 0)
4372 goto out; 4627 goto out;
4373 decode_getfattr(&xdr, res->fattr, res->server); 4628 decode_getfattr(&xdr, res->fattr, res->server,
4629 !RPC_IS_ASYNC(rqstp->rq_task));
4374out: 4630out:
4375 return status; 4631 return status;
4376} 4632}
@@ -4397,7 +4653,8 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac
4397 status = decode_access(&xdr, res); 4653 status = decode_access(&xdr, res);
4398 if (status != 0) 4654 if (status != 0)
4399 goto out; 4655 goto out;
4400 decode_getfattr(&xdr, res->fattr, res->server); 4656 decode_getfattr(&xdr, res->fattr, res->server,
4657 !RPC_IS_ASYNC(rqstp->rq_task));
4401out: 4658out:
4402 return status; 4659 return status;
4403} 4660}
@@ -4424,7 +4681,8 @@ static int nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_lo
4424 goto out; 4681 goto out;
4425 if ((status = decode_getfh(&xdr, res->fh)) != 0) 4682 if ((status = decode_getfh(&xdr, res->fh)) != 0)
4426 goto out; 4683 goto out;
4427 status = decode_getfattr(&xdr, res->fattr, res->server); 4684 status = decode_getfattr(&xdr, res->fattr, res->server
4685 ,!RPC_IS_ASYNC(rqstp->rq_task));
4428out: 4686out:
4429 return status; 4687 return status;
4430} 4688}
@@ -4448,7 +4706,8 @@ static int nfs4_xdr_dec_lookup_root(struct rpc_rqst *rqstp, __be32 *p, struct nf
4448 if ((status = decode_putrootfh(&xdr)) != 0) 4706 if ((status = decode_putrootfh(&xdr)) != 0)
4449 goto out; 4707 goto out;
4450 if ((status = decode_getfh(&xdr, res->fh)) == 0) 4708 if ((status = decode_getfh(&xdr, res->fh)) == 0)
4451 status = decode_getfattr(&xdr, res->fattr, res->server); 4709 status = decode_getfattr(&xdr, res->fattr, res->server,
4710 !RPC_IS_ASYNC(rqstp->rq_task));
4452out: 4711out:
4453 return status; 4712 return status;
4454} 4713}
@@ -4473,7 +4732,8 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem
4473 goto out; 4732 goto out;
4474 if ((status = decode_remove(&xdr, &res->cinfo)) != 0) 4733 if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
4475 goto out; 4734 goto out;
4476 decode_getfattr(&xdr, &res->dir_attr, res->server); 4735 decode_getfattr(&xdr, &res->dir_attr, res->server,
4736 !RPC_IS_ASYNC(rqstp->rq_task));
4477out: 4737out:
4478 return status; 4738 return status;
4479} 4739}
@@ -4503,11 +4763,13 @@ static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_re
4503 if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0) 4763 if ((status = decode_rename(&xdr, &res->old_cinfo, &res->new_cinfo)) != 0)
4504 goto out; 4764 goto out;
4505 /* Current FH is target directory */ 4765 /* Current FH is target directory */
4506 if (decode_getfattr(&xdr, res->new_fattr, res->server) != 0) 4766 if (decode_getfattr(&xdr, res->new_fattr, res->server,
4767 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4507 goto out; 4768 goto out;
4508 if ((status = decode_restorefh(&xdr)) != 0) 4769 if ((status = decode_restorefh(&xdr)) != 0)
4509 goto out; 4770 goto out;
4510 decode_getfattr(&xdr, res->old_fattr, res->server); 4771 decode_getfattr(&xdr, res->old_fattr, res->server,
4772 !RPC_IS_ASYNC(rqstp->rq_task));
4511out: 4773out:
4512 return status; 4774 return status;
4513} 4775}
@@ -4540,11 +4802,13 @@ static int nfs4_xdr_dec_link(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_link
4540 * Note order: OP_LINK leaves the directory as the current 4802 * Note order: OP_LINK leaves the directory as the current
4541 * filehandle. 4803 * filehandle.
4542 */ 4804 */
4543 if (decode_getfattr(&xdr, res->dir_attr, res->server) != 0) 4805 if (decode_getfattr(&xdr, res->dir_attr, res->server,
4806 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4544 goto out; 4807 goto out;
4545 if ((status = decode_restorefh(&xdr)) != 0) 4808 if ((status = decode_restorefh(&xdr)) != 0)
4546 goto out; 4809 goto out;
4547 decode_getfattr(&xdr, res->fattr, res->server); 4810 decode_getfattr(&xdr, res->fattr, res->server,
4811 !RPC_IS_ASYNC(rqstp->rq_task));
4548out: 4812out:
4549 return status; 4813 return status;
4550} 4814}
@@ -4573,11 +4837,13 @@ static int nfs4_xdr_dec_create(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_cr
4573 goto out; 4837 goto out;
4574 if ((status = decode_getfh(&xdr, res->fh)) != 0) 4838 if ((status = decode_getfh(&xdr, res->fh)) != 0)
4575 goto out; 4839 goto out;
4576 if (decode_getfattr(&xdr, res->fattr, res->server) != 0) 4840 if (decode_getfattr(&xdr, res->fattr, res->server,
4841 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4577 goto out; 4842 goto out;
4578 if ((status = decode_restorefh(&xdr)) != 0) 4843 if ((status = decode_restorefh(&xdr)) != 0)
4579 goto out; 4844 goto out;
4580 decode_getfattr(&xdr, res->dir_fattr, res->server); 4845 decode_getfattr(&xdr, res->dir_fattr, res->server,
4846 !RPC_IS_ASYNC(rqstp->rq_task));
4581out: 4847out:
4582 return status; 4848 return status;
4583} 4849}
@@ -4609,7 +4875,8 @@ static int nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_g
4609 status = decode_putfh(&xdr); 4875 status = decode_putfh(&xdr);
4610 if (status) 4876 if (status)
4611 goto out; 4877 goto out;
4612 status = decode_getfattr(&xdr, res->fattr, res->server); 4878 status = decode_getfattr(&xdr, res->fattr, res->server,
4879 !RPC_IS_ASYNC(rqstp->rq_task));
4613out: 4880out:
4614 return status; 4881 return status;
4615} 4882}
@@ -4716,7 +4983,8 @@ static int nfs4_xdr_dec_close(struct rpc_rqst *rqstp, __be32 *p, struct nfs_clos
4716 * an ESTALE error. Shouldn't be a problem, 4983 * an ESTALE error. Shouldn't be a problem,
4717 * though, since fattr->valid will remain unset. 4984 * though, since fattr->valid will remain unset.
4718 */ 4985 */
4719 decode_getfattr(&xdr, res->fattr, res->server); 4986 decode_getfattr(&xdr, res->fattr, res->server,
4987 !RPC_IS_ASYNC(rqstp->rq_task));
4720out: 4988out:
4721 return status; 4989 return status;
4722} 4990}
@@ -4748,11 +5016,13 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, __be32 *p, struct nfs_openr
4748 goto out; 5016 goto out;
4749 if (decode_getfh(&xdr, &res->fh) != 0) 5017 if (decode_getfh(&xdr, &res->fh) != 0)
4750 goto out; 5018 goto out;
4751 if (decode_getfattr(&xdr, res->f_attr, res->server) != 0) 5019 if (decode_getfattr(&xdr, res->f_attr, res->server,
5020 !RPC_IS_ASYNC(rqstp->rq_task)) != 0)
4752 goto out; 5021 goto out;
4753 if (decode_restorefh(&xdr) != 0) 5022 if (decode_restorefh(&xdr) != 0)
4754 goto out; 5023 goto out;
4755 decode_getfattr(&xdr, res->dir_attr, res->server); 5024 decode_getfattr(&xdr, res->dir_attr, res->server,
5025 !RPC_IS_ASYNC(rqstp->rq_task));
4756out: 5026out:
4757 return status; 5027 return status;
4758} 5028}
@@ -4800,7 +5070,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, __be32 *p, struct nf
4800 status = decode_open(&xdr, res); 5070 status = decode_open(&xdr, res);
4801 if (status) 5071 if (status)
4802 goto out; 5072 goto out;
4803 decode_getfattr(&xdr, res->f_attr, res->server); 5073 decode_getfattr(&xdr, res->f_attr, res->server,
5074 !RPC_IS_ASYNC(rqstp->rq_task));
4804out: 5075out:
4805 return status; 5076 return status;
4806} 5077}
@@ -4827,7 +5098,8 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se
4827 status = decode_setattr(&xdr); 5098 status = decode_setattr(&xdr);
4828 if (status) 5099 if (status)
4829 goto out; 5100 goto out;
4830 decode_getfattr(&xdr, res->fattr, res->server); 5101 decode_getfattr(&xdr, res->fattr, res->server,
5102 !RPC_IS_ASYNC(rqstp->rq_task));
4831out: 5103out:
4832 return status; 5104 return status;
4833} 5105}
@@ -5001,7 +5273,8 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, __be32 *p, struct nfs_writ
5001 status = decode_write(&xdr, res); 5273 status = decode_write(&xdr, res);
5002 if (status) 5274 if (status)
5003 goto out; 5275 goto out;
5004 decode_getfattr(&xdr, res->fattr, res->server); 5276 decode_getfattr(&xdr, res->fattr, res->server,
5277 !RPC_IS_ASYNC(rqstp->rq_task));
5005 if (!status) 5278 if (!status)
5006 status = res->count; 5279 status = res->count;
5007out: 5280out:
@@ -5030,7 +5303,8 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, __be32 *p, struct nfs_wri
5030 status = decode_commit(&xdr, res); 5303 status = decode_commit(&xdr, res);
5031 if (status) 5304 if (status)
5032 goto out; 5305 goto out;
5033 decode_getfattr(&xdr, res->fattr, res->server); 5306 decode_getfattr(&xdr, res->fattr, res->server,
5307 !RPC_IS_ASYNC(rqstp->rq_task));
5034out: 5308out:
5035 return status; 5309 return status;
5036} 5310}
@@ -5194,7 +5468,8 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nf
5194 if (status != 0) 5468 if (status != 0)
5195 goto out; 5469 goto out;
5196 status = decode_delegreturn(&xdr); 5470 status = decode_delegreturn(&xdr);
5197 decode_getfattr(&xdr, res->fattr, res->server); 5471 decode_getfattr(&xdr, res->fattr, res->server,
5472 !RPC_IS_ASYNC(rqstp->rq_task));
5198out: 5473out:
5199 return status; 5474 return status;
5200} 5475}
@@ -5222,7 +5497,8 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p,
5222 goto out; 5497 goto out;
5223 xdr_enter_page(&xdr, PAGE_SIZE); 5498 xdr_enter_page(&xdr, PAGE_SIZE);
5224 status = decode_getfattr(&xdr, &res->fs_locations->fattr, 5499 status = decode_getfattr(&xdr, &res->fs_locations->fattr,
5225 res->fs_locations->server); 5500 res->fs_locations->server,
5501 !RPC_IS_ASYNC(req->rq_task));
5226out: 5502out:
5227 return status; 5503 return status;
5228} 5504}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0b4cbdc60abd..867f70504531 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -73,7 +73,7 @@ enum {
73 Opt_cto, Opt_nocto, 73 Opt_cto, Opt_nocto,
74 Opt_ac, Opt_noac, 74 Opt_ac, Opt_noac,
75 Opt_lock, Opt_nolock, 75 Opt_lock, Opt_nolock,
76 Opt_v2, Opt_v3, 76 Opt_v2, Opt_v3, Opt_v4,
77 Opt_udp, Opt_tcp, Opt_rdma, 77 Opt_udp, Opt_tcp, Opt_rdma,
78 Opt_acl, Opt_noacl, 78 Opt_acl, Opt_noacl,
79 Opt_rdirplus, Opt_nordirplus, 79 Opt_rdirplus, Opt_nordirplus,
@@ -127,6 +127,7 @@ static const match_table_t nfs_mount_option_tokens = {
127 { Opt_nolock, "nolock" }, 127 { Opt_nolock, "nolock" },
128 { Opt_v2, "v2" }, 128 { Opt_v2, "v2" },
129 { Opt_v3, "v3" }, 129 { Opt_v3, "v3" },
130 { Opt_v4, "v4" },
130 { Opt_udp, "udp" }, 131 { Opt_udp, "udp" },
131 { Opt_tcp, "tcp" }, 132 { Opt_tcp, "tcp" },
132 { Opt_rdma, "rdma" }, 133 { Opt_rdma, "rdma" },
@@ -158,7 +159,7 @@ static const match_table_t nfs_mount_option_tokens = {
158 { Opt_mountvers, "mountvers=%s" }, 159 { Opt_mountvers, "mountvers=%s" },
159 { Opt_nfsvers, "nfsvers=%s" }, 160 { Opt_nfsvers, "nfsvers=%s" },
160 { Opt_nfsvers, "vers=%s" }, 161 { Opt_nfsvers, "vers=%s" },
161 { Opt_minorversion, "minorversion=%u" }, 162 { Opt_minorversion, "minorversion=%s" },
162 163
163 { Opt_sec, "sec=%s" }, 164 { Opt_sec, "sec=%s" },
164 { Opt_proto, "proto=%s" }, 165 { Opt_proto, "proto=%s" },
@@ -272,6 +273,10 @@ static const struct super_operations nfs_sops = {
272}; 273};
273 274
274#ifdef CONFIG_NFS_V4 275#ifdef CONFIG_NFS_V4
276static int nfs4_validate_text_mount_data(void *options,
277 struct nfs_parsed_mount_data *args, const char *dev_name);
278static int nfs4_try_mount(int flags, const char *dev_name,
279 struct nfs_parsed_mount_data *data, struct vfsmount *mnt);
275static int nfs4_get_sb(struct file_system_type *fs_type, 280static int nfs4_get_sb(struct file_system_type *fs_type,
276 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 281 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
277static int nfs4_remote_get_sb(struct file_system_type *fs_type, 282static int nfs4_remote_get_sb(struct file_system_type *fs_type,
@@ -742,127 +747,23 @@ static int nfs_verify_server_address(struct sockaddr *addr)
742 } 747 }
743 } 748 }
744 749
750 dfprintk(MOUNT, "NFS: Invalid IP address specified\n");
745 return 0; 751 return 0;
746} 752}
747 753
748static void nfs_parse_ipv4_address(char *string, size_t str_len,
749 struct sockaddr *sap, size_t *addr_len)
750{
751 struct sockaddr_in *sin = (struct sockaddr_in *)sap;
752 u8 *addr = (u8 *)&sin->sin_addr.s_addr;
753
754 if (str_len <= INET_ADDRSTRLEN) {
755 dfprintk(MOUNT, "NFS: parsing IPv4 address %*s\n",
756 (int)str_len, string);
757
758 sin->sin_family = AF_INET;
759 *addr_len = sizeof(*sin);
760 if (in4_pton(string, str_len, addr, '\0', NULL))
761 return;
762 }
763
764 sap->sa_family = AF_UNSPEC;
765 *addr_len = 0;
766}
767
768#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
769static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len,
770 const char *delim,
771 struct sockaddr_in6 *sin6)
772{
773 char *p;
774 size_t len;
775
776 if ((string + str_len) == delim)
777 return 1;
778
779 if (*delim != IPV6_SCOPE_DELIMITER)
780 return 0;
781
782 if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
783 return 0;
784
785 len = (string + str_len) - delim - 1;
786 p = kstrndup(delim + 1, len, GFP_KERNEL);
787 if (p) {
788 unsigned long scope_id = 0;
789 struct net_device *dev;
790
791 dev = dev_get_by_name(&init_net, p);
792 if (dev != NULL) {
793 scope_id = dev->ifindex;
794 dev_put(dev);
795 } else {
796 if (strict_strtoul(p, 10, &scope_id) == 0) {
797 kfree(p);
798 return 0;
799 }
800 }
801
802 kfree(p);
803
804 sin6->sin6_scope_id = scope_id;
805 dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id);
806 return 1;
807 }
808
809 return 0;
810}
811
812static void nfs_parse_ipv6_address(char *string, size_t str_len,
813 struct sockaddr *sap, size_t *addr_len)
814{
815 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
816 u8 *addr = (u8 *)&sin6->sin6_addr.in6_u;
817 const char *delim;
818
819 if (str_len <= INET6_ADDRSTRLEN) {
820 dfprintk(MOUNT, "NFS: parsing IPv6 address %*s\n",
821 (int)str_len, string);
822
823 sin6->sin6_family = AF_INET6;
824 *addr_len = sizeof(*sin6);
825 if (in6_pton(string, str_len, addr,
826 IPV6_SCOPE_DELIMITER, &delim) != 0) {
827 if (nfs_parse_ipv6_scope_id(string, str_len,
828 delim, sin6) != 0)
829 return;
830 }
831 }
832
833 sap->sa_family = AF_UNSPEC;
834 *addr_len = 0;
835}
836#else
837static void nfs_parse_ipv6_address(char *string, size_t str_len,
838 struct sockaddr *sap, size_t *addr_len)
839{
840 sap->sa_family = AF_UNSPEC;
841 *addr_len = 0;
842}
843#endif
844
845/* 754/*
846 * Construct a sockaddr based on the contents of a string that contains 755 * Select between a default port value and a user-specified port value.
847 * an IP address in presentation format. 756 * If a zero value is set, then autobind will be used.
848 *
849 * If there is a problem constructing the new sockaddr, set the address
850 * family to AF_UNSPEC.
851 */ 757 */
852void nfs_parse_ip_address(char *string, size_t str_len, 758static void nfs_set_default_port(struct sockaddr *sap, const int parsed_port,
853 struct sockaddr *sap, size_t *addr_len) 759 const unsigned short default_port)
854{ 760{
855 unsigned int i, colons; 761 unsigned short port = default_port;
856 762
857 colons = 0; 763 if (parsed_port != NFS_UNSPEC_PORT)
858 for (i = 0; i < str_len; i++) 764 port = parsed_port;
859 if (string[i] == ':')
860 colons++;
861 765
862 if (colons >= 2) 766 rpc_set_port(sap, port);
863 nfs_parse_ipv6_address(string, str_len, sap, addr_len);
864 else
865 nfs_parse_ipv4_address(string, str_len, sap, addr_len);
866} 767}
867 768
868/* 769/*
@@ -904,8 +805,6 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt)
904 805
905/* 806/*
906 * Parse the value of the 'sec=' option. 807 * Parse the value of the 'sec=' option.
907 *
908 * The flavor_len setting is for v4 mounts.
909 */ 808 */
910static int nfs_parse_security_flavors(char *value, 809static int nfs_parse_security_flavors(char *value,
911 struct nfs_parsed_mount_data *mnt) 810 struct nfs_parsed_mount_data *mnt)
@@ -916,53 +815,43 @@ static int nfs_parse_security_flavors(char *value,
916 815
917 switch (match_token(value, nfs_secflavor_tokens, args)) { 816 switch (match_token(value, nfs_secflavor_tokens, args)) {
918 case Opt_sec_none: 817 case Opt_sec_none:
919 mnt->auth_flavor_len = 0;
920 mnt->auth_flavors[0] = RPC_AUTH_NULL; 818 mnt->auth_flavors[0] = RPC_AUTH_NULL;
921 break; 819 break;
922 case Opt_sec_sys: 820 case Opt_sec_sys:
923 mnt->auth_flavor_len = 0;
924 mnt->auth_flavors[0] = RPC_AUTH_UNIX; 821 mnt->auth_flavors[0] = RPC_AUTH_UNIX;
925 break; 822 break;
926 case Opt_sec_krb5: 823 case Opt_sec_krb5:
927 mnt->auth_flavor_len = 1;
928 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5; 824 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5;
929 break; 825 break;
930 case Opt_sec_krb5i: 826 case Opt_sec_krb5i:
931 mnt->auth_flavor_len = 1;
932 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I; 827 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5I;
933 break; 828 break;
934 case Opt_sec_krb5p: 829 case Opt_sec_krb5p:
935 mnt->auth_flavor_len = 1;
936 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P; 830 mnt->auth_flavors[0] = RPC_AUTH_GSS_KRB5P;
937 break; 831 break;
938 case Opt_sec_lkey: 832 case Opt_sec_lkey:
939 mnt->auth_flavor_len = 1;
940 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY; 833 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEY;
941 break; 834 break;
942 case Opt_sec_lkeyi: 835 case Opt_sec_lkeyi:
943 mnt->auth_flavor_len = 1;
944 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI; 836 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYI;
945 break; 837 break;
946 case Opt_sec_lkeyp: 838 case Opt_sec_lkeyp:
947 mnt->auth_flavor_len = 1;
948 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP; 839 mnt->auth_flavors[0] = RPC_AUTH_GSS_LKEYP;
949 break; 840 break;
950 case Opt_sec_spkm: 841 case Opt_sec_spkm:
951 mnt->auth_flavor_len = 1;
952 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM; 842 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKM;
953 break; 843 break;
954 case Opt_sec_spkmi: 844 case Opt_sec_spkmi:
955 mnt->auth_flavor_len = 1;
956 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI; 845 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMI;
957 break; 846 break;
958 case Opt_sec_spkmp: 847 case Opt_sec_spkmp:
959 mnt->auth_flavor_len = 1;
960 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP; 848 mnt->auth_flavors[0] = RPC_AUTH_GSS_SPKMP;
961 break; 849 break;
962 default: 850 default:
963 return 0; 851 return 0;
964 } 852 }
965 853
854 mnt->auth_flavor_len = 1;
966 return 1; 855 return 1;
967} 856}
968 857
@@ -1001,7 +890,6 @@ static int nfs_parse_mount_options(char *raw,
1001 while ((p = strsep(&raw, ",")) != NULL) { 890 while ((p = strsep(&raw, ",")) != NULL) {
1002 substring_t args[MAX_OPT_ARGS]; 891 substring_t args[MAX_OPT_ARGS];
1003 unsigned long option; 892 unsigned long option;
1004 int int_option;
1005 int token; 893 int token;
1006 894
1007 if (!*p) 895 if (!*p)
@@ -1047,10 +935,18 @@ static int nfs_parse_mount_options(char *raw,
1047 break; 935 break;
1048 case Opt_v2: 936 case Opt_v2:
1049 mnt->flags &= ~NFS_MOUNT_VER3; 937 mnt->flags &= ~NFS_MOUNT_VER3;
938 mnt->version = 2;
1050 break; 939 break;
1051 case Opt_v3: 940 case Opt_v3:
1052 mnt->flags |= NFS_MOUNT_VER3; 941 mnt->flags |= NFS_MOUNT_VER3;
942 mnt->version = 3;
1053 break; 943 break;
944#ifdef CONFIG_NFS_V4
945 case Opt_v4:
946 mnt->flags &= ~NFS_MOUNT_VER3;
947 mnt->version = 4;
948 break;
949#endif
1054 case Opt_udp: 950 case Opt_udp:
1055 mnt->flags &= ~NFS_MOUNT_TCP; 951 mnt->flags &= ~NFS_MOUNT_TCP;
1056 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 952 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1264,20 +1160,33 @@ static int nfs_parse_mount_options(char *raw,
1264 switch (option) { 1160 switch (option) {
1265 case NFS2_VERSION: 1161 case NFS2_VERSION:
1266 mnt->flags &= ~NFS_MOUNT_VER3; 1162 mnt->flags &= ~NFS_MOUNT_VER3;
1163 mnt->version = 2;
1267 break; 1164 break;
1268 case NFS3_VERSION: 1165 case NFS3_VERSION:
1269 mnt->flags |= NFS_MOUNT_VER3; 1166 mnt->flags |= NFS_MOUNT_VER3;
1167 mnt->version = 3;
1270 break; 1168 break;
1169#ifdef CONFIG_NFS_V4
1170 case NFS4_VERSION:
1171 mnt->flags &= ~NFS_MOUNT_VER3;
1172 mnt->version = 4;
1173 break;
1174#endif
1271 default: 1175 default:
1272 goto out_invalid_value; 1176 goto out_invalid_value;
1273 } 1177 }
1274 break; 1178 break;
1275 case Opt_minorversion: 1179 case Opt_minorversion:
1276 if (match_int(args, &int_option)) 1180 string = match_strdup(args);
1277 return 0; 1181 if (string == NULL)
1278 if (int_option < 0 || int_option > NFS4_MAX_MINOR_VERSION) 1182 goto out_nomem;
1279 return 0; 1183 rc = strict_strtoul(string, 10, &option);
1280 mnt->minorversion = int_option; 1184 kfree(string);
1185 if (rc != 0)
1186 goto out_invalid_value;
1187 if (option > NFS4_MAX_MINOR_VERSION)
1188 goto out_invalid_value;
1189 mnt->minorversion = option;
1281 break; 1190 break;
1282 1191
1283 /* 1192 /*
@@ -1352,11 +1261,14 @@ static int nfs_parse_mount_options(char *raw,
1352 string = match_strdup(args); 1261 string = match_strdup(args);
1353 if (string == NULL) 1262 if (string == NULL)
1354 goto out_nomem; 1263 goto out_nomem;
1355 nfs_parse_ip_address(string, strlen(string), 1264 mnt->nfs_server.addrlen =
1356 (struct sockaddr *) 1265 rpc_pton(string, strlen(string),
1357 &mnt->nfs_server.address, 1266 (struct sockaddr *)
1358 &mnt->nfs_server.addrlen); 1267 &mnt->nfs_server.address,
1268 sizeof(mnt->nfs_server.address));
1359 kfree(string); 1269 kfree(string);
1270 if (mnt->nfs_server.addrlen == 0)
1271 goto out_invalid_address;
1360 break; 1272 break;
1361 case Opt_clientaddr: 1273 case Opt_clientaddr:
1362 string = match_strdup(args); 1274 string = match_strdup(args);
@@ -1376,11 +1288,14 @@ static int nfs_parse_mount_options(char *raw,
1376 string = match_strdup(args); 1288 string = match_strdup(args);
1377 if (string == NULL) 1289 if (string == NULL)
1378 goto out_nomem; 1290 goto out_nomem;
1379 nfs_parse_ip_address(string, strlen(string), 1291 mnt->mount_server.addrlen =
1380 (struct sockaddr *) 1292 rpc_pton(string, strlen(string),
1381 &mnt->mount_server.address, 1293 (struct sockaddr *)
1382 &mnt->mount_server.addrlen); 1294 &mnt->mount_server.address,
1295 sizeof(mnt->mount_server.address));
1383 kfree(string); 1296 kfree(string);
1297 if (mnt->mount_server.addrlen == 0)
1298 goto out_invalid_address;
1384 break; 1299 break;
1385 case Opt_lookupcache: 1300 case Opt_lookupcache:
1386 string = match_strdup(args); 1301 string = match_strdup(args);
@@ -1432,8 +1347,11 @@ static int nfs_parse_mount_options(char *raw,
1432 1347
1433 return 1; 1348 return 1;
1434 1349
1350out_invalid_address:
1351 printk(KERN_INFO "NFS: bad IP address specified: %s\n", p);
1352 return 0;
1435out_invalid_value: 1353out_invalid_value:
1436 printk(KERN_INFO "NFS: bad mount option value specified: %s \n", p); 1354 printk(KERN_INFO "NFS: bad mount option value specified: %s\n", p);
1437 return 0; 1355 return 0;
1438out_nomem: 1356out_nomem:
1439 printk(KERN_INFO "NFS: not enough memory to parse option\n"); 1357 printk(KERN_INFO "NFS: not enough memory to parse option\n");
@@ -1445,13 +1363,60 @@ out_security_failure:
1445} 1363}
1446 1364
1447/* 1365/*
1366 * Match the requested auth flavors with the list returned by
1367 * the server. Returns zero and sets the mount's authentication
1368 * flavor on success; returns -EACCES if server does not support
1369 * the requested flavor.
1370 */
1371static int nfs_walk_authlist(struct nfs_parsed_mount_data *args,
1372 struct nfs_mount_request *request)
1373{
1374 unsigned int i, j, server_authlist_len = *(request->auth_flav_len);
1375
1376 /*
1377 * Certain releases of Linux's mountd return an empty
1378 * flavor list. To prevent behavioral regression with
1379 * these servers (ie. rejecting mounts that used to
1380 * succeed), revert to pre-2.6.32 behavior (no checking)
1381 * if the returned flavor list is empty.
1382 */
1383 if (server_authlist_len == 0)
1384 return 0;
1385
1386 /*
1387 * We avoid sophisticated negotiating here, as there are
1388 * plenty of cases where we can get it wrong, providing
1389 * either too little or too much security.
1390 *
1391 * RFC 2623, section 2.7 suggests we SHOULD prefer the
1392 * flavor listed first. However, some servers list
1393 * AUTH_NULL first. Our caller plants AUTH_SYS, the
1394 * preferred default, in args->auth_flavors[0] if user
1395 * didn't specify sec= mount option.
1396 */
1397 for (i = 0; i < args->auth_flavor_len; i++)
1398 for (j = 0; j < server_authlist_len; j++)
1399 if (args->auth_flavors[i] == request->auth_flavs[j]) {
1400 dfprintk(MOUNT, "NFS: using auth flavor %d\n",
1401 request->auth_flavs[j]);
1402 args->auth_flavors[0] = request->auth_flavs[j];
1403 return 0;
1404 }
1405
1406 dfprintk(MOUNT, "NFS: server does not support requested auth flavor\n");
1407 nfs_umount(request);
1408 return -EACCES;
1409}
1410
1411/*
1448 * Use the remote server's MOUNT service to request the NFS file handle 1412 * Use the remote server's MOUNT service to request the NFS file handle
1449 * corresponding to the provided path. 1413 * corresponding to the provided path.
1450 */ 1414 */
1451static int nfs_try_mount(struct nfs_parsed_mount_data *args, 1415static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1452 struct nfs_fh *root_fh) 1416 struct nfs_fh *root_fh)
1453{ 1417{
1454 unsigned int auth_flavor_len = 0; 1418 rpc_authflavor_t server_authlist[NFS_MAX_SECFLAVORS];
1419 unsigned int server_authlist_len = ARRAY_SIZE(server_authlist);
1455 struct nfs_mount_request request = { 1420 struct nfs_mount_request request = {
1456 .sap = (struct sockaddr *) 1421 .sap = (struct sockaddr *)
1457 &args->mount_server.address, 1422 &args->mount_server.address,
@@ -1459,7 +1424,8 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1459 .protocol = args->mount_server.protocol, 1424 .protocol = args->mount_server.protocol,
1460 .fh = root_fh, 1425 .fh = root_fh,
1461 .noresvport = args->flags & NFS_MOUNT_NORESVPORT, 1426 .noresvport = args->flags & NFS_MOUNT_NORESVPORT,
1462 .auth_flav_len = &auth_flavor_len, 1427 .auth_flav_len = &server_authlist_len,
1428 .auth_flavs = server_authlist,
1463 }; 1429 };
1464 int status; 1430 int status;
1465 1431
@@ -1485,23 +1451,25 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1485 args->mount_server.addrlen = args->nfs_server.addrlen; 1451 args->mount_server.addrlen = args->nfs_server.addrlen;
1486 } 1452 }
1487 request.salen = args->mount_server.addrlen; 1453 request.salen = args->mount_server.addrlen;
1488 1454 nfs_set_default_port(request.sap, args->mount_server.port, 0);
1489 /*
1490 * autobind will be used if mount_server.port == 0
1491 */
1492 nfs_set_port(request.sap, args->mount_server.port);
1493 1455
1494 /* 1456 /*
1495 * Now ask the mount server to map our export path 1457 * Now ask the mount server to map our export path
1496 * to a file handle. 1458 * to a file handle.
1497 */ 1459 */
1498 status = nfs_mount(&request); 1460 status = nfs_mount(&request);
1499 if (status == 0) 1461 if (status != 0) {
1500 return 0; 1462 dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
1463 request.hostname, status);
1464 return status;
1465 }
1501 1466
1502 dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", 1467 /*
1503 request.hostname, status); 1468 * MNTv1 (NFSv2) does not support auth flavor negotiation.
1504 return status; 1469 */
1470 if (args->mount_server.version != NFS_MNT3_VERSION)
1471 return 0;
1472 return nfs_walk_authlist(args, &request);
1505} 1473}
1506 1474
1507static int nfs_parse_simple_hostname(const char *dev_name, 1475static int nfs_parse_simple_hostname(const char *dev_name,
@@ -1661,6 +1629,7 @@ static int nfs_validate_mount_data(void *options,
1661 const char *dev_name) 1629 const char *dev_name)
1662{ 1630{
1663 struct nfs_mount_data *data = (struct nfs_mount_data *)options; 1631 struct nfs_mount_data *data = (struct nfs_mount_data *)options;
1632 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
1664 1633
1665 if (data == NULL) 1634 if (data == NULL)
1666 goto out_no_data; 1635 goto out_no_data;
@@ -1672,10 +1641,12 @@ static int nfs_validate_mount_data(void *options,
1672 args->acregmax = NFS_DEF_ACREGMAX; 1641 args->acregmax = NFS_DEF_ACREGMAX;
1673 args->acdirmin = NFS_DEF_ACDIRMIN; 1642 args->acdirmin = NFS_DEF_ACDIRMIN;
1674 args->acdirmax = NFS_DEF_ACDIRMAX; 1643 args->acdirmax = NFS_DEF_ACDIRMAX;
1675 args->mount_server.port = 0; /* autobind unless user sets port */ 1644 args->mount_server.port = NFS_UNSPEC_PORT;
1676 args->nfs_server.port = 0; /* autobind unless user sets port */ 1645 args->nfs_server.port = NFS_UNSPEC_PORT;
1677 args->nfs_server.protocol = XPRT_TRANSPORT_TCP; 1646 args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1678 args->auth_flavors[0] = RPC_AUTH_UNIX; 1647 args->auth_flavors[0] = RPC_AUTH_UNIX;
1648 args->auth_flavor_len = 1;
1649 args->minorversion = 0;
1679 1650
1680 switch (data->version) { 1651 switch (data->version) {
1681 case 1: 1652 case 1:
@@ -1697,8 +1668,11 @@ static int nfs_validate_mount_data(void *options,
1697 if (data->root.size > NFS3_FHSIZE || data->root.size == 0) 1668 if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
1698 goto out_invalid_fh; 1669 goto out_invalid_fh;
1699 mntfh->size = data->root.size; 1670 mntfh->size = data->root.size;
1700 } else 1671 args->version = 3;
1672 } else {
1701 mntfh->size = NFS2_FHSIZE; 1673 mntfh->size = NFS2_FHSIZE;
1674 args->version = 2;
1675 }
1702 1676
1703 1677
1704 memcpy(mntfh->data, data->root.data, mntfh->size); 1678 memcpy(mntfh->data, data->root.data, mntfh->size);
@@ -1720,11 +1694,9 @@ static int nfs_validate_mount_data(void *options,
1720 args->acdirmin = data->acdirmin; 1694 args->acdirmin = data->acdirmin;
1721 args->acdirmax = data->acdirmax; 1695 args->acdirmax = data->acdirmax;
1722 1696
1723 memcpy(&args->nfs_server.address, &data->addr, 1697 memcpy(sap, &data->addr, sizeof(data->addr));
1724 sizeof(data->addr));
1725 args->nfs_server.addrlen = sizeof(data->addr); 1698 args->nfs_server.addrlen = sizeof(data->addr);
1726 if (!nfs_verify_server_address((struct sockaddr *) 1699 if (!nfs_verify_server_address(sap))
1727 &args->nfs_server.address))
1728 goto out_no_address; 1700 goto out_no_address;
1729 1701
1730 if (!(data->flags & NFS_MOUNT_TCP)) 1702 if (!(data->flags & NFS_MOUNT_TCP))
@@ -1772,12 +1744,18 @@ static int nfs_validate_mount_data(void *options,
1772 if (nfs_parse_mount_options((char *)options, args) == 0) 1744 if (nfs_parse_mount_options((char *)options, args) == 0)
1773 return -EINVAL; 1745 return -EINVAL;
1774 1746
1775 if (!nfs_verify_server_address((struct sockaddr *) 1747 if (!nfs_verify_server_address(sap))
1776 &args->nfs_server.address))
1777 goto out_no_address; 1748 goto out_no_address;
1778 1749
1779 nfs_set_port((struct sockaddr *)&args->nfs_server.address, 1750 if (args->version == 4)
1780 args->nfs_server.port); 1751#ifdef CONFIG_NFS_V4
1752 return nfs4_validate_text_mount_data(options,
1753 args, dev_name);
1754#else
1755 goto out_v4_not_compiled;
1756#endif
1757
1758 nfs_set_default_port(sap, args->nfs_server.port, 0);
1781 1759
1782 nfs_set_mount_transport_protocol(args); 1760 nfs_set_mount_transport_protocol(args);
1783 1761
@@ -1825,6 +1803,12 @@ out_v3_not_compiled:
1825 return -EPROTONOSUPPORT; 1803 return -EPROTONOSUPPORT;
1826#endif /* !CONFIG_NFS_V3 */ 1804#endif /* !CONFIG_NFS_V3 */
1827 1805
1806#ifndef CONFIG_NFS_V4
1807out_v4_not_compiled:
1808 dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
1809 return -EPROTONOSUPPORT;
1810#endif /* !CONFIG_NFS_V4 */
1811
1828out_nomem: 1812out_nomem:
1829 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n"); 1813 dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
1830 return -ENOMEM; 1814 return -ENOMEM;
@@ -2120,6 +2104,14 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2120 if (error < 0) 2104 if (error < 0)
2121 goto out; 2105 goto out;
2122 2106
2107#ifdef CONFIG_NFS_V4
2108 if (data->version == 4) {
2109 error = nfs4_try_mount(flags, dev_name, data, mnt);
2110 kfree(data->client_address);
2111 goto out;
2112 }
2113#endif /* CONFIG_NFS_V4 */
2114
2123 /* Get a volume representation */ 2115 /* Get a volume representation */
2124 server = nfs_create_server(data, mntfh); 2116 server = nfs_create_server(data, mntfh);
2125 if (IS_ERR(server)) { 2117 if (IS_ERR(server)) {
@@ -2317,6 +2309,43 @@ static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
2317 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); 2309 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3);
2318} 2310}
2319 2311
2312static int nfs4_validate_text_mount_data(void *options,
2313 struct nfs_parsed_mount_data *args,
2314 const char *dev_name)
2315{
2316 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2317
2318 nfs_set_default_port(sap, args->nfs_server.port, NFS_PORT);
2319
2320 nfs_validate_transport_protocol(args);
2321
2322 nfs4_validate_mount_flags(args);
2323
2324 if (args->version != 4) {
2325 dfprintk(MOUNT,
2326 "NFS4: Illegal mount version\n");
2327 return -EINVAL;
2328 }
2329
2330 if (args->auth_flavor_len > 1) {
2331 dfprintk(MOUNT,
2332 "NFS4: Too many RPC auth flavours specified\n");
2333 return -EINVAL;
2334 }
2335
2336 if (args->client_address == NULL) {
2337 dfprintk(MOUNT,
2338 "NFS4: mount program didn't pass callback address\n");
2339 return -EINVAL;
2340 }
2341
2342 return nfs_parse_devname(dev_name,
2343 &args->nfs_server.hostname,
2344 NFS4_MAXNAMLEN,
2345 &args->nfs_server.export_path,
2346 NFS4_MAXPATHLEN);
2347}
2348
2320/* 2349/*
2321 * Validate NFSv4 mount options 2350 * Validate NFSv4 mount options
2322 */ 2351 */
@@ -2324,7 +2353,7 @@ static int nfs4_validate_mount_data(void *options,
2324 struct nfs_parsed_mount_data *args, 2353 struct nfs_parsed_mount_data *args,
2325 const char *dev_name) 2354 const char *dev_name)
2326{ 2355{
2327 struct sockaddr_in *ap; 2356 struct sockaddr *sap = (struct sockaddr *)&args->nfs_server.address;
2328 struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; 2357 struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
2329 char *c; 2358 char *c;
2330 2359
@@ -2337,23 +2366,22 @@ static int nfs4_validate_mount_data(void *options,
2337 args->acregmax = NFS_DEF_ACREGMAX; 2366 args->acregmax = NFS_DEF_ACREGMAX;
2338 args->acdirmin = NFS_DEF_ACDIRMIN; 2367 args->acdirmin = NFS_DEF_ACDIRMIN;
2339 args->acdirmax = NFS_DEF_ACDIRMAX; 2368 args->acdirmax = NFS_DEF_ACDIRMAX;
2340 args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */ 2369 args->nfs_server.port = NFS_UNSPEC_PORT;
2341 args->auth_flavors[0] = RPC_AUTH_UNIX; 2370 args->auth_flavors[0] = RPC_AUTH_UNIX;
2342 args->auth_flavor_len = 0; 2371 args->auth_flavor_len = 1;
2372 args->version = 4;
2343 args->minorversion = 0; 2373 args->minorversion = 0;
2344 2374
2345 switch (data->version) { 2375 switch (data->version) {
2346 case 1: 2376 case 1:
2347 ap = (struct sockaddr_in *)&args->nfs_server.address;
2348 if (data->host_addrlen > sizeof(args->nfs_server.address)) 2377 if (data->host_addrlen > sizeof(args->nfs_server.address))
2349 goto out_no_address; 2378 goto out_no_address;
2350 if (data->host_addrlen == 0) 2379 if (data->host_addrlen == 0)
2351 goto out_no_address; 2380 goto out_no_address;
2352 args->nfs_server.addrlen = data->host_addrlen; 2381 args->nfs_server.addrlen = data->host_addrlen;
2353 if (copy_from_user(ap, data->host_addr, data->host_addrlen)) 2382 if (copy_from_user(sap, data->host_addr, data->host_addrlen))
2354 return -EFAULT; 2383 return -EFAULT;
2355 if (!nfs_verify_server_address((struct sockaddr *) 2384 if (!nfs_verify_server_address(sap))
2356 &args->nfs_server.address))
2357 goto out_no_address; 2385 goto out_no_address;
2358 2386
2359 if (data->auth_flavourlen) { 2387 if (data->auth_flavourlen) {
@@ -2399,39 +2427,14 @@ static int nfs4_validate_mount_data(void *options,
2399 nfs_validate_transport_protocol(args); 2427 nfs_validate_transport_protocol(args);
2400 2428
2401 break; 2429 break;
2402 default: { 2430 default:
2403 int status;
2404
2405 if (nfs_parse_mount_options((char *)options, args) == 0) 2431 if (nfs_parse_mount_options((char *)options, args) == 0)
2406 return -EINVAL; 2432 return -EINVAL;
2407 2433
2408 if (!nfs_verify_server_address((struct sockaddr *) 2434 if (!nfs_verify_server_address(sap))
2409 &args->nfs_server.address))
2410 return -EINVAL; 2435 return -EINVAL;
2411 2436
2412 nfs_set_port((struct sockaddr *)&args->nfs_server.address, 2437 return nfs4_validate_text_mount_data(options, args, dev_name);
2413 args->nfs_server.port);
2414
2415 nfs_validate_transport_protocol(args);
2416
2417 nfs4_validate_mount_flags(args);
2418
2419 if (args->auth_flavor_len > 1)
2420 goto out_inval_auth;
2421
2422 if (args->client_address == NULL)
2423 goto out_no_client_address;
2424
2425 status = nfs_parse_devname(dev_name,
2426 &args->nfs_server.hostname,
2427 NFS4_MAXNAMLEN,
2428 &args->nfs_server.export_path,
2429 NFS4_MAXPATHLEN);
2430 if (status < 0)
2431 return status;
2432
2433 break;
2434 }
2435 } 2438 }
2436 2439
2437 return 0; 2440 return 0;
@@ -2448,10 +2451,6 @@ out_inval_auth:
2448out_no_address: 2451out_no_address:
2449 dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n"); 2452 dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
2450 return -EINVAL; 2453 return -EINVAL;
2451
2452out_no_client_address:
2453 dfprintk(MOUNT, "NFS4: mount program didn't pass callback address\n");
2454 return -EINVAL;
2455} 2454}
2456 2455
2457/* 2456/*
@@ -2618,6 +2617,34 @@ out_err:
2618 return ret; 2617 return ret;
2619} 2618}
2620 2619
2620static int nfs4_try_mount(int flags, const char *dev_name,
2621 struct nfs_parsed_mount_data *data,
2622 struct vfsmount *mnt)
2623{
2624 char *export_path;
2625 struct vfsmount *root_mnt;
2626 int error;
2627
2628 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2629
2630 export_path = data->nfs_server.export_path;
2631 data->nfs_server.export_path = "/";
2632 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data,
2633 data->nfs_server.hostname);
2634 data->nfs_server.export_path = export_path;
2635
2636 error = PTR_ERR(root_mnt);
2637 if (IS_ERR(root_mnt))
2638 goto out;
2639
2640 error = nfs_follow_remote_path(root_mnt, export_path, mnt);
2641
2642out:
2643 dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", error,
2644 error != 0 ? " [error]" : "");
2645 return error;
2646}
2647
2621/* 2648/*
2622 * Get the superblock for an NFS4 mountpoint 2649 * Get the superblock for an NFS4 mountpoint
2623 */ 2650 */
@@ -2625,8 +2652,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2625 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2652 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
2626{ 2653{
2627 struct nfs_parsed_mount_data *data; 2654 struct nfs_parsed_mount_data *data;
2628 char *export_path;
2629 struct vfsmount *root_mnt;
2630 int error = -ENOMEM; 2655 int error = -ENOMEM;
2631 2656
2632 data = kzalloc(sizeof(*data), GFP_KERNEL); 2657 data = kzalloc(sizeof(*data), GFP_KERNEL);
@@ -2638,17 +2663,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2638 if (error < 0) 2663 if (error < 0)
2639 goto out; 2664 goto out;
2640 2665
2641 export_path = data->nfs_server.export_path; 2666 error = nfs4_try_mount(flags, dev_name, data, mnt);
2642 data->nfs_server.export_path = "/";
2643 root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, data,
2644 data->nfs_server.hostname);
2645 data->nfs_server.export_path = export_path;
2646
2647 error = PTR_ERR(root_mnt);
2648 if (IS_ERR(root_mnt))
2649 goto out;
2650
2651 error = nfs_follow_remote_path(root_mnt, export_path, mnt);
2652 2667
2653out: 2668out:
2654 kfree(data->client_address); 2669 kfree(data->client_address);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index a34fae21fe10..120acadc6a84 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -13,6 +13,7 @@
13#include <linux/file.h> 13#include <linux/file.h>
14#include <linux/writeback.h> 14#include <linux/writeback.h>
15#include <linux/swap.h> 15#include <linux/swap.h>
16#include <linux/migrate.h>
16 17
17#include <linux/sunrpc/clnt.h> 18#include <linux/sunrpc/clnt.h>
18#include <linux/nfs_fs.h> 19#include <linux/nfs_fs.h>
@@ -26,6 +27,7 @@
26#include "internal.h" 27#include "internal.h"
27#include "iostat.h" 28#include "iostat.h"
28#include "nfs4_fs.h" 29#include "nfs4_fs.h"
30#include "fscache.h"
29 31
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 32#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 33
@@ -218,24 +220,17 @@ static void nfs_end_page_writeback(struct page *page)
218 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 220 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
219} 221}
220 222
221/* 223static struct nfs_page *nfs_find_and_lock_request(struct page *page)
222 * Find an associated nfs write request, and prepare to flush it out
223 * May return an error if the user signalled nfs_wait_on_request().
224 */
225static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
226 struct page *page)
227{ 224{
228 struct inode *inode = page->mapping->host; 225 struct inode *inode = page->mapping->host;
229 struct nfs_page *req; 226 struct nfs_page *req;
230 int ret; 227 int ret;
231 228
232 spin_lock(&inode->i_lock); 229 spin_lock(&inode->i_lock);
233 for(;;) { 230 for (;;) {
234 req = nfs_page_find_request_locked(page); 231 req = nfs_page_find_request_locked(page);
235 if (req == NULL) { 232 if (req == NULL)
236 spin_unlock(&inode->i_lock); 233 break;
237 return 0;
238 }
239 if (nfs_set_page_tag_locked(req)) 234 if (nfs_set_page_tag_locked(req))
240 break; 235 break;
241 /* Note: If we hold the page lock, as is the case in nfs_writepage, 236 /* Note: If we hold the page lock, as is the case in nfs_writepage,
@@ -247,23 +242,40 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
247 ret = nfs_wait_on_request(req); 242 ret = nfs_wait_on_request(req);
248 nfs_release_request(req); 243 nfs_release_request(req);
249 if (ret != 0) 244 if (ret != 0)
250 return ret; 245 return ERR_PTR(ret);
251 spin_lock(&inode->i_lock); 246 spin_lock(&inode->i_lock);
252 } 247 }
253 if (test_bit(PG_CLEAN, &req->wb_flags)) {
254 spin_unlock(&inode->i_lock);
255 BUG();
256 }
257 if (nfs_set_page_writeback(page) != 0) {
258 spin_unlock(&inode->i_lock);
259 BUG();
260 }
261 spin_unlock(&inode->i_lock); 248 spin_unlock(&inode->i_lock);
249 return req;
250}
251
252/*
253 * Find an associated nfs write request, and prepare to flush it out
254 * May return an error if the user signalled nfs_wait_on_request().
255 */
256static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
257 struct page *page)
258{
259 struct nfs_page *req;
260 int ret = 0;
261
262 req = nfs_find_and_lock_request(page);
263 if (!req)
264 goto out;
265 ret = PTR_ERR(req);
266 if (IS_ERR(req))
267 goto out;
268
269 ret = nfs_set_page_writeback(page);
270 BUG_ON(ret != 0);
271 BUG_ON(test_bit(PG_CLEAN, &req->wb_flags));
272
262 if (!nfs_pageio_add_request(pgio, req)) { 273 if (!nfs_pageio_add_request(pgio, req)) {
263 nfs_redirty_request(req); 274 nfs_redirty_request(req);
264 return pgio->pg_error; 275 ret = pgio->pg_error;
265 } 276 }
266 return 0; 277out:
278 return ret;
267} 279}
268 280
269static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 281static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
@@ -1580,6 +1592,41 @@ int nfs_wb_page(struct inode *inode, struct page* page)
1580 return nfs_wb_page_priority(inode, page, FLUSH_STABLE); 1592 return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
1581} 1593}
1582 1594
1595#ifdef CONFIG_MIGRATION
1596int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1597 struct page *page)
1598{
1599 struct nfs_page *req;
1600 int ret;
1601
1602 if (PageFsCache(page))
1603 nfs_fscache_release_page(page, GFP_KERNEL);
1604
1605 req = nfs_find_and_lock_request(page);
1606 ret = PTR_ERR(req);
1607 if (IS_ERR(req))
1608 goto out;
1609
1610 ret = migrate_page(mapping, newpage, page);
1611 if (!req)
1612 goto out;
1613 if (ret)
1614 goto out_unlock;
1615 page_cache_get(newpage);
1616 req->wb_page = newpage;
1617 SetPagePrivate(newpage);
1618 set_page_private(newpage, page_private(page));
1619 ClearPagePrivate(page);
1620 set_page_private(page, 0);
1621 page_cache_release(page);
1622out_unlock:
1623 nfs_clear_page_tag_locked(req);
1624 nfs_release_request(req);
1625out:
1626 return ret;
1627}
1628#endif
1629
1583int __init nfs_init_writepagecache(void) 1630int __init nfs_init_writepagecache(void)
1584{ 1631{
1585 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1632 nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 5573508f707f..36fcabbf5186 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
34 int flags = nfsexp_flags(rqstp, exp); 34 int flags = nfsexp_flags(rqstp, exp);
35 int ret; 35 int ret;
36 36
37 validate_process_creds();
38
37 /* discard any old override before preparing the new set */ 39 /* discard any old override before preparing the new set */
38 revert_creds(get_cred(current->real_cred)); 40 revert_creds(get_cred(current->real_cred));
39 new = prepare_creds(); 41 new = prepare_creds();
@@ -86,8 +88,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
86 else 88 else
87 new->cap_effective = cap_raise_nfsd_set(new->cap_effective, 89 new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
88 new->cap_permitted); 90 new->cap_permitted);
91 validate_process_creds();
89 put_cred(override_creds(new)); 92 put_cred(override_creds(new));
90 put_cred(new); 93 put_cred(new);
94 validate_process_creds();
91 return 0; 95 return 0;
92 96
93oom: 97oom:
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b92a27629fb7..d9462643155c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -85,6 +85,11 @@ static void expkey_request(struct cache_detail *cd,
85 (*bpp)[-1] = '\n'; 85 (*bpp)[-1] = '\n';
86} 86}
87 87
88static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
89{
90 return sunrpc_cache_pipe_upcall(cd, h, expkey_request);
91}
92
88static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old); 93static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
89static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *); 94static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
90static struct cache_detail svc_expkey_cache; 95static struct cache_detail svc_expkey_cache;
@@ -259,7 +264,7 @@ static struct cache_detail svc_expkey_cache = {
259 .hash_table = expkey_table, 264 .hash_table = expkey_table,
260 .name = "nfsd.fh", 265 .name = "nfsd.fh",
261 .cache_put = expkey_put, 266 .cache_put = expkey_put,
262 .cache_request = expkey_request, 267 .cache_upcall = expkey_upcall,
263 .cache_parse = expkey_parse, 268 .cache_parse = expkey_parse,
264 .cache_show = expkey_show, 269 .cache_show = expkey_show,
265 .match = expkey_match, 270 .match = expkey_match,
@@ -355,6 +360,11 @@ static void svc_export_request(struct cache_detail *cd,
355 (*bpp)[-1] = '\n'; 360 (*bpp)[-1] = '\n';
356} 361}
357 362
363static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
364{
365 return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
366}
367
358static struct svc_export *svc_export_update(struct svc_export *new, 368static struct svc_export *svc_export_update(struct svc_export *new,
359 struct svc_export *old); 369 struct svc_export *old);
360static struct svc_export *svc_export_lookup(struct svc_export *); 370static struct svc_export *svc_export_lookup(struct svc_export *);
@@ -724,7 +734,7 @@ struct cache_detail svc_export_cache = {
724 .hash_table = export_table, 734 .hash_table = export_table,
725 .name = "nfsd.export", 735 .name = "nfsd.export",
726 .cache_put = svc_export_put, 736 .cache_put = svc_export_put,
727 .cache_request = svc_export_request, 737 .cache_upcall = svc_export_upcall,
728 .cache_parse = svc_export_parse, 738 .cache_parse = svc_export_parse,
729 .cache_show = svc_export_show, 739 .cache_show = svc_export_show,
730 .match = svc_export_match, 740 .match = svc_export_match,
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5b398421b051..cdfa86fa1471 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -146,6 +146,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
146} 146}
147 147
148static int 148static int
149idtoname_upcall(struct cache_detail *cd, struct cache_head *ch)
150{
151 return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request);
152}
153
154static int
149idtoname_match(struct cache_head *ca, struct cache_head *cb) 155idtoname_match(struct cache_head *ca, struct cache_head *cb)
150{ 156{
151 struct ent *a = container_of(ca, struct ent, h); 157 struct ent *a = container_of(ca, struct ent, h);
@@ -175,10 +181,10 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
175} 181}
176 182
177static void 183static void
178warn_no_idmapd(struct cache_detail *detail) 184warn_no_idmapd(struct cache_detail *detail, int has_died)
179{ 185{
180 printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n", 186 printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n",
181 detail->last_close? "died" : "not been started"); 187 has_died ? "died" : "not been started");
182} 188}
183 189
184 190
@@ -192,7 +198,7 @@ static struct cache_detail idtoname_cache = {
192 .hash_table = idtoname_table, 198 .hash_table = idtoname_table,
193 .name = "nfs4.idtoname", 199 .name = "nfs4.idtoname",
194 .cache_put = ent_put, 200 .cache_put = ent_put,
195 .cache_request = idtoname_request, 201 .cache_upcall = idtoname_upcall,
196 .cache_parse = idtoname_parse, 202 .cache_parse = idtoname_parse,
197 .cache_show = idtoname_show, 203 .cache_show = idtoname_show,
198 .warn_no_listener = warn_no_idmapd, 204 .warn_no_listener = warn_no_idmapd,
@@ -325,6 +331,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
325} 331}
326 332
327static int 333static int
334nametoid_upcall(struct cache_detail *cd, struct cache_head *ch)
335{
336 return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request);
337}
338
339static int
328nametoid_match(struct cache_head *ca, struct cache_head *cb) 340nametoid_match(struct cache_head *ca, struct cache_head *cb)
329{ 341{
330 struct ent *a = container_of(ca, struct ent, h); 342 struct ent *a = container_of(ca, struct ent, h);
@@ -363,7 +375,7 @@ static struct cache_detail nametoid_cache = {
363 .hash_table = nametoid_table, 375 .hash_table = nametoid_table,
364 .name = "nfs4.nametoid", 376 .name = "nfs4.nametoid",
365 .cache_put = ent_put, 377 .cache_put = ent_put,
366 .cache_request = nametoid_request, 378 .cache_upcall = nametoid_upcall,
367 .cache_parse = nametoid_parse, 379 .cache_parse = nametoid_parse,
368 .cache_show = nametoid_show, 380 .cache_show = nametoid_show,
369 .warn_no_listener = warn_no_idmapd, 381 .warn_no_listener = warn_no_idmapd,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 6d0847562d87..7e906c5b7671 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -37,6 +37,7 @@
37#include <linux/nfsd/xdr.h> 37#include <linux/nfsd/xdr.h>
38#include <linux/nfsd/syscall.h> 38#include <linux/nfsd/syscall.h>
39#include <linux/lockd/lockd.h> 39#include <linux/lockd/lockd.h>
40#include <linux/sunrpc/clnt.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <net/ipv6.h> 43#include <net/ipv6.h>
@@ -490,22 +491,18 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
490 * 491 *
491 * Input: 492 * Input:
492 * buf: '\n'-terminated C string containing a 493 * buf: '\n'-terminated C string containing a
493 * presentation format IPv4 address 494 * presentation format IP address
494 * size: length of C string in @buf 495 * size: length of C string in @buf
495 * Output: 496 * Output:
496 * On success: returns zero if all specified locks were released; 497 * On success: returns zero if all specified locks were released;
497 * returns one if one or more locks were not released 498 * returns one if one or more locks were not released
498 * On error: return code is negative errno value 499 * On error: return code is negative errno value
499 *
500 * Note: Only AF_INET client addresses are passed in
501 */ 500 */
502static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) 501static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
503{ 502{
504 struct sockaddr_in sin = { 503 struct sockaddr_storage address;
505 .sin_family = AF_INET, 504 struct sockaddr *sap = (struct sockaddr *)&address;
506 }; 505 size_t salen = sizeof(address);
507 int b1, b2, b3, b4;
508 char c;
509 char *fo_path; 506 char *fo_path;
510 507
511 /* sanity check */ 508 /* sanity check */
@@ -519,14 +516,10 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size)
519 if (qword_get(&buf, fo_path, size) < 0) 516 if (qword_get(&buf, fo_path, size) < 0)
520 return -EINVAL; 517 return -EINVAL;
521 518
522 /* get ipv4 address */ 519 if (rpc_pton(fo_path, size, sap, salen) == 0)
523 if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
524 return -EINVAL;
525 if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
526 return -EINVAL; 520 return -EINVAL;
527 sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
528 521
529 return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin); 522 return nlmsvc_unlock_all_by_ip(sap);
530} 523}
531 524
532/** 525/**
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 492c79b7800b..24d58adfe5fd 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -496,7 +496,9 @@ nfsd(void *vrqstp)
496 /* Lock the export hash tables for reading. */ 496 /* Lock the export hash tables for reading. */
497 exp_readlock(); 497 exp_readlock();
498 498
499 validate_process_creds();
499 svc_process(rqstp); 500 svc_process(rqstp);
501 validate_process_creds();
500 502
501 /* Unlock export hash tables */ 503 /* Unlock export hash tables */
502 exp_readunlock(); 504 exp_readunlock();
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 23341c1063bc..8fa09bfbcba7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -684,6 +684,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
684 __be32 err; 684 __be32 err;
685 int host_err; 685 int host_err;
686 686
687 validate_process_creds();
688
687 /* 689 /*
688 * If we get here, then the client has already done an "open", 690 * If we get here, then the client has already done an "open",
689 * and (hopefully) checked permission - so allow OWNER_OVERRIDE 691 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
@@ -740,6 +742,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
740out_nfserr: 742out_nfserr:
741 err = nfserrno(host_err); 743 err = nfserrno(host_err);
742out: 744out:
745 validate_process_creds();
743 return err; 746 return err;
744} 747}
745 748
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 7e0b61be212e..c668bca579c1 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -209,6 +209,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
209 * We cannot call radix_tree_preload for the kernels older 209 * We cannot call radix_tree_preload for the kernels older
210 * than 2.6.23, because it is not exported for modules. 210 * than 2.6.23, because it is not exported for modules.
211 */ 211 */
212retry:
212 err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); 213 err = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
213 if (err) 214 if (err)
214 goto failed_unlock; 215 goto failed_unlock;
@@ -219,7 +220,6 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
219 (unsigned long long)oldkey, 220 (unsigned long long)oldkey,
220 (unsigned long long)newkey); 221 (unsigned long long)newkey);
221 222
222retry:
223 spin_lock_irq(&btnc->tree_lock); 223 spin_lock_irq(&btnc->tree_lock);
224 err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page); 224 err = radix_tree_insert(&btnc->page_tree, newkey, obh->b_page);
225 spin_unlock_irq(&btnc->tree_lock); 225 spin_unlock_irq(&btnc->tree_lock);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b401654011a2..8a1e61545f41 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1747,8 +1747,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1747 * we know zeros will only be needed in the first and/or last cluster. 1747 * we know zeros will only be needed in the first and/or last cluster.
1748 */ 1748 */
1749 if (clusters_to_alloc || extents_to_split || 1749 if (clusters_to_alloc || extents_to_split ||
1750 wc->w_desc[0].c_needs_zero || 1750 (wc->w_clen && (wc->w_desc[0].c_needs_zero ||
1751 wc->w_desc[wc->w_clen - 1].c_needs_zero) 1751 wc->w_desc[wc->w_clen - 1].c_needs_zero)))
1752 cluster_of_pages = 1; 1752 cluster_of_pages = 1;
1753 else 1753 else
1754 cluster_of_pages = 0; 1754 cluster_of_pages = 0;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index 2f28b7de2c8d..b4957c7d9fe2 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -85,6 +85,17 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry,
85 goto bail; 85 goto bail;
86 } 86 }
87 87
88 /*
89 * If the last lookup failed to create dentry lock, let us
90 * redo it.
91 */
92 if (!dentry->d_fsdata) {
93 mlog(0, "Inode %llu doesn't have dentry lock, "
94 "returning false\n",
95 (unsigned long long)OCFS2_I(inode)->ip_blkno);
96 goto bail;
97 }
98
88 ret = 1; 99 ret = 1;
89 100
90bail: 101bail:
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 1c9efb406a96..02bf17808bdc 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -325,6 +325,7 @@ clear_fields:
325} 325}
326 326
327static struct backing_dev_info dlmfs_backing_dev_info = { 327static struct backing_dev_info dlmfs_backing_dev_info = {
328 .name = "ocfs2-dlmfs",
328 .ra_pages = 0, /* No readahead */ 329 .ra_pages = 0, /* No readahead */
329 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 330 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
330}; 331};
diff --git a/fs/open.c b/fs/open.c
index dd98e8076024..31191bf513e4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -199,7 +199,7 @@ out:
199int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, 199int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
200 struct file *filp) 200 struct file *filp)
201{ 201{
202 int err; 202 int ret;
203 struct iattr newattrs; 203 struct iattr newattrs;
204 204
205 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ 205 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
@@ -214,12 +214,14 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
214 } 214 }
215 215
216 /* Remove suid/sgid on truncate too */ 216 /* Remove suid/sgid on truncate too */
217 newattrs.ia_valid |= should_remove_suid(dentry); 217 ret = should_remove_suid(dentry);
218 if (ret)
219 newattrs.ia_valid |= ret | ATTR_FORCE;
218 220
219 mutex_lock(&dentry->d_inode->i_mutex); 221 mutex_lock(&dentry->d_inode->i_mutex);
220 err = notify_change(dentry, &newattrs); 222 ret = notify_change(dentry, &newattrs);
221 mutex_unlock(&dentry->d_inode->i_mutex); 223 mutex_unlock(&dentry->d_inode->i_mutex);
222 return err; 224 return ret;
223} 225}
224 226
225static long do_sys_truncate(const char __user *pathname, loff_t length) 227static long do_sys_truncate(const char __user *pathname, loff_t length)
@@ -957,6 +959,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
957 int error; 959 int error;
958 struct file *f; 960 struct file *f;
959 961
962 validate_creds(cred);
963
960 /* 964 /*
961 * We must always pass in a valid mount pointer. Historically 965 * We must always pass in a valid mount pointer. Historically
962 * callers got away with not passing it, but we must enforce this at 966 * callers got away with not passing it, but we must enforce this at
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 0ff7566c767c..a7f0110fca4c 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -46,6 +46,7 @@ static const struct super_operations ramfs_ops;
46static const struct inode_operations ramfs_dir_inode_operations; 46static const struct inode_operations ramfs_dir_inode_operations;
47 47
48static struct backing_dev_info ramfs_backing_dev_info = { 48static struct backing_dev_info ramfs_backing_dev_info = {
49 .name = "ramfs",
49 .ra_pages = 0, /* No readahead */ 50 .ra_pages = 0, /* No readahead */
50 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | 51 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
51 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY | 52 BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
diff --git a/fs/super.c b/fs/super.c
index 2761d3e22ed9..9cda337ddae2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -62,9 +62,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
62 s = NULL; 62 s = NULL;
63 goto out; 63 goto out;
64 } 64 }
65 INIT_LIST_HEAD(&s->s_dirty);
66 INIT_LIST_HEAD(&s->s_io);
67 INIT_LIST_HEAD(&s->s_more_io);
68 INIT_LIST_HEAD(&s->s_files); 65 INIT_LIST_HEAD(&s->s_files);
69 INIT_LIST_HEAD(&s->s_instances); 66 INIT_LIST_HEAD(&s->s_instances);
70 INIT_HLIST_HEAD(&s->s_anon); 67 INIT_HLIST_HEAD(&s->s_anon);
@@ -171,7 +168,7 @@ int __put_super_and_need_restart(struct super_block *sb)
171 * Drops a temporary reference, frees superblock if there's no 168 * Drops a temporary reference, frees superblock if there's no
172 * references left. 169 * references left.
173 */ 170 */
174static void put_super(struct super_block *sb) 171void put_super(struct super_block *sb)
175{ 172{
176 spin_lock(&sb_lock); 173 spin_lock(&sb_lock);
177 __put_super(sb); 174 __put_super(sb);
diff --git a/fs/sync.c b/fs/sync.c
index 3422ba61d86d..103cc7fdd3df 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -19,20 +19,22 @@
19 SYNC_FILE_RANGE_WAIT_AFTER) 19 SYNC_FILE_RANGE_WAIT_AFTER)
20 20
21/* 21/*
22 * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0) 22 * Do the filesystem syncing work. For simple filesystems
23 * just dirties buffers with inodes so we have to submit IO for these buffers 23 * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to
24 * via __sync_blockdev(). This also speeds up the wait == 1 case since in that 24 * submit IO for these buffers via __sync_blockdev(). This also speeds up the
25 * case write_inode() functions do sync_dirty_buffer() and thus effectively 25 * wait == 1 case since in that case write_inode() functions do
26 * write one block at a time. 26 * sync_dirty_buffer() and thus effectively write one block at a time.
27 */ 27 */
28static int __sync_filesystem(struct super_block *sb, int wait) 28static int __sync_filesystem(struct super_block *sb, int wait)
29{ 29{
30 /* Avoid doing twice syncing and cache pruning for quota sync */ 30 /* Avoid doing twice syncing and cache pruning for quota sync */
31 if (!wait) 31 if (!wait) {
32 writeout_quota_sb(sb, -1); 32 writeout_quota_sb(sb, -1);
33 else 33 writeback_inodes_sb(sb);
34 } else {
34 sync_quota_sb(sb, -1); 35 sync_quota_sb(sb, -1);
35 sync_inodes_sb(sb, wait); 36 sync_inodes_sb(sb);
37 }
36 if (sb->s_op->sync_fs) 38 if (sb->s_op->sync_fs)
37 sb->s_op->sync_fs(sb, wait); 39 sb->s_op->sync_fs(sb, wait);
38 return __sync_blockdev(sb->s_bdev, wait); 40 return __sync_blockdev(sb->s_bdev, wait);
@@ -118,7 +120,7 @@ restart:
118 */ 120 */
119SYSCALL_DEFINE0(sync) 121SYSCALL_DEFINE0(sync)
120{ 122{
121 wakeup_pdflush(0); 123 wakeup_flusher_threads(0);
122 sync_filesystems(0); 124 sync_filesystems(0);
123 sync_filesystems(1); 125 sync_filesystems(1);
124 if (unlikely(laptop_mode)) 126 if (unlikely(laptop_mode))
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 14f2d71ea3ce..0050fc40e8c9 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -760,6 +760,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
760const struct inode_operations sysfs_dir_inode_operations = { 760const struct inode_operations sysfs_dir_inode_operations = {
761 .lookup = sysfs_lookup, 761 .lookup = sysfs_lookup,
762 .setattr = sysfs_setattr, 762 .setattr = sysfs_setattr,
763 .setxattr = sysfs_setxattr,
763}; 764};
764 765
765static void remove_dir(struct sysfs_dirent *sd) 766static void remove_dir(struct sysfs_dirent *sd)
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 555f0ff988df..e28cecf179f5 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -18,6 +18,8 @@
18#include <linux/capability.h> 18#include <linux/capability.h>
19#include <linux/errno.h> 19#include <linux/errno.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/xattr.h>
22#include <linux/security.h>
21#include "sysfs.h" 23#include "sysfs.h"
22 24
23extern struct super_block * sysfs_sb; 25extern struct super_block * sysfs_sb;
@@ -29,12 +31,14 @@ static const struct address_space_operations sysfs_aops = {
29}; 31};
30 32
31static struct backing_dev_info sysfs_backing_dev_info = { 33static struct backing_dev_info sysfs_backing_dev_info = {
34 .name = "sysfs",
32 .ra_pages = 0, /* No readahead */ 35 .ra_pages = 0, /* No readahead */
33 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK, 36 .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
34}; 37};
35 38
36static const struct inode_operations sysfs_inode_operations ={ 39static const struct inode_operations sysfs_inode_operations ={
37 .setattr = sysfs_setattr, 40 .setattr = sysfs_setattr,
41 .setxattr = sysfs_setxattr,
38}; 42};
39 43
40int __init sysfs_inode_init(void) 44int __init sysfs_inode_init(void)
@@ -42,18 +46,37 @@ int __init sysfs_inode_init(void)
42 return bdi_init(&sysfs_backing_dev_info); 46 return bdi_init(&sysfs_backing_dev_info);
43} 47}
44 48
49struct sysfs_inode_attrs *sysfs_init_inode_attrs(struct sysfs_dirent *sd)
50{
51 struct sysfs_inode_attrs *attrs;
52 struct iattr *iattrs;
53
54 attrs = kzalloc(sizeof(struct sysfs_inode_attrs), GFP_KERNEL);
55 if (!attrs)
56 return NULL;
57 iattrs = &attrs->ia_iattr;
58
59 /* assign default attributes */
60 iattrs->ia_mode = sd->s_mode;
61 iattrs->ia_uid = 0;
62 iattrs->ia_gid = 0;
63 iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
64
65 return attrs;
66}
45int sysfs_setattr(struct dentry * dentry, struct iattr * iattr) 67int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
46{ 68{
47 struct inode * inode = dentry->d_inode; 69 struct inode * inode = dentry->d_inode;
48 struct sysfs_dirent * sd = dentry->d_fsdata; 70 struct sysfs_dirent * sd = dentry->d_fsdata;
49 struct iattr * sd_iattr; 71 struct sysfs_inode_attrs *sd_attrs;
72 struct iattr *iattrs;
50 unsigned int ia_valid = iattr->ia_valid; 73 unsigned int ia_valid = iattr->ia_valid;
51 int error; 74 int error;
52 75
53 if (!sd) 76 if (!sd)
54 return -EINVAL; 77 return -EINVAL;
55 78
56 sd_iattr = sd->s_iattr; 79 sd_attrs = sd->s_iattr;
57 80
58 error = inode_change_ok(inode, iattr); 81 error = inode_change_ok(inode, iattr);
59 if (error) 82 if (error)
@@ -65,42 +88,77 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
65 if (error) 88 if (error)
66 return error; 89 return error;
67 90
68 if (!sd_iattr) { 91 if (!sd_attrs) {
69 /* setting attributes for the first time, allocate now */ 92 /* setting attributes for the first time, allocate now */
70 sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL); 93 sd_attrs = sysfs_init_inode_attrs(sd);
71 if (!sd_iattr) 94 if (!sd_attrs)
72 return -ENOMEM; 95 return -ENOMEM;
73 /* assign default attributes */ 96 sd->s_iattr = sd_attrs;
74 sd_iattr->ia_mode = sd->s_mode; 97 } else {
75 sd_iattr->ia_uid = 0; 98 /* attributes were changed at least once in past */
76 sd_iattr->ia_gid = 0; 99 iattrs = &sd_attrs->ia_iattr;
77 sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME; 100
78 sd->s_iattr = sd_iattr; 101 if (ia_valid & ATTR_UID)
102 iattrs->ia_uid = iattr->ia_uid;
103 if (ia_valid & ATTR_GID)
104 iattrs->ia_gid = iattr->ia_gid;
105 if (ia_valid & ATTR_ATIME)
106 iattrs->ia_atime = timespec_trunc(iattr->ia_atime,
107 inode->i_sb->s_time_gran);
108 if (ia_valid & ATTR_MTIME)
109 iattrs->ia_mtime = timespec_trunc(iattr->ia_mtime,
110 inode->i_sb->s_time_gran);
111 if (ia_valid & ATTR_CTIME)
112 iattrs->ia_ctime = timespec_trunc(iattr->ia_ctime,
113 inode->i_sb->s_time_gran);
114 if (ia_valid & ATTR_MODE) {
115 umode_t mode = iattr->ia_mode;
116
117 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
118 mode &= ~S_ISGID;
119 iattrs->ia_mode = sd->s_mode = mode;
120 }
79 } 121 }
122 return error;
123}
80 124
81 /* attributes were changed atleast once in past */ 125int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
82 126 size_t size, int flags)
83 if (ia_valid & ATTR_UID) 127{
84 sd_iattr->ia_uid = iattr->ia_uid; 128 struct sysfs_dirent *sd = dentry->d_fsdata;
85 if (ia_valid & ATTR_GID) 129 struct sysfs_inode_attrs *iattrs;
86 sd_iattr->ia_gid = iattr->ia_gid; 130 void *secdata;
87 if (ia_valid & ATTR_ATIME) 131 int error;
88 sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime, 132 u32 secdata_len = 0;
89 inode->i_sb->s_time_gran); 133
90 if (ia_valid & ATTR_MTIME) 134 if (!sd)
91 sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime, 135 return -EINVAL;
92 inode->i_sb->s_time_gran); 136 if (!sd->s_iattr)
93 if (ia_valid & ATTR_CTIME) 137 sd->s_iattr = sysfs_init_inode_attrs(sd);
94 sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime, 138 if (!sd->s_iattr)
95 inode->i_sb->s_time_gran); 139 return -ENOMEM;
96 if (ia_valid & ATTR_MODE) { 140
97 umode_t mode = iattr->ia_mode; 141 iattrs = sd->s_iattr;
98 142
99 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) 143 if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)) {
100 mode &= ~S_ISGID; 144 const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
101 sd_iattr->ia_mode = sd->s_mode = mode; 145 error = security_inode_setsecurity(dentry->d_inode, suffix,
102 } 146 value, size, flags);
147 if (error)
148 goto out;
149 error = security_inode_getsecctx(dentry->d_inode,
150 &secdata, &secdata_len);
151 if (error)
152 goto out;
153 if (iattrs->ia_secdata)
154 security_release_secctx(iattrs->ia_secdata,
155 iattrs->ia_secdata_len);
156 iattrs->ia_secdata = secdata;
157 iattrs->ia_secdata_len = secdata_len;
103 158
159 } else
160 return -EINVAL;
161out:
104 return error; 162 return error;
105} 163}
106 164
@@ -146,6 +204,7 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd)
146static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) 204static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
147{ 205{
148 struct bin_attribute *bin_attr; 206 struct bin_attribute *bin_attr;
207 struct sysfs_inode_attrs *iattrs;
149 208
150 inode->i_private = sysfs_get(sd); 209 inode->i_private = sysfs_get(sd);
151 inode->i_mapping->a_ops = &sysfs_aops; 210 inode->i_mapping->a_ops = &sysfs_aops;
@@ -154,16 +213,20 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
154 inode->i_ino = sd->s_ino; 213 inode->i_ino = sd->s_ino;
155 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key); 214 lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
156 215
157 if (sd->s_iattr) { 216 iattrs = sd->s_iattr;
217 if (iattrs) {
158 /* sysfs_dirent has non-default attributes 218 /* sysfs_dirent has non-default attributes
159 * get them for the new inode from persistent copy 219 * get them for the new inode from persistent copy
160 * in sysfs_dirent 220 * in sysfs_dirent
161 */ 221 */
162 set_inode_attr(inode, sd->s_iattr); 222 set_inode_attr(inode, &iattrs->ia_iattr);
223 if (iattrs->ia_secdata)
224 security_inode_notifysecctx(inode,
225 iattrs->ia_secdata,
226 iattrs->ia_secdata_len);
163 } else 227 } else
164 set_default_inode_attr(inode, sd->s_mode); 228 set_default_inode_attr(inode, sd->s_mode);
165 229
166
167 /* initialize inode according to type */ 230 /* initialize inode according to type */
168 switch (sysfs_type(sd)) { 231 switch (sysfs_type(sd)) {
169 case SYSFS_DIR: 232 case SYSFS_DIR:
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 1d897ad808e0..c5081ad77026 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -16,6 +16,7 @@
16#include <linux/kobject.h> 16#include <linux/kobject.h>
17#include <linux/namei.h> 17#include <linux/namei.h>
18#include <linux/mutex.h> 18#include <linux/mutex.h>
19#include <linux/security.h>
19 20
20#include "sysfs.h" 21#include "sysfs.h"
21 22
@@ -209,6 +210,7 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co
209} 210}
210 211
211const struct inode_operations sysfs_symlink_inode_operations = { 212const struct inode_operations sysfs_symlink_inode_operations = {
213 .setxattr = sysfs_setxattr,
212 .readlink = generic_readlink, 214 .readlink = generic_readlink,
213 .follow_link = sysfs_follow_link, 215 .follow_link = sysfs_follow_link,
214 .put_link = sysfs_put_link, 216 .put_link = sysfs_put_link,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 3fa0d98481e2..af4c4e7482ac 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,6 +8,8 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/fs.h>
12
11struct sysfs_open_dirent; 13struct sysfs_open_dirent;
12 14
13/* type-specific structures for sysfs_dirent->s_* union members */ 15/* type-specific structures for sysfs_dirent->s_* union members */
@@ -31,6 +33,12 @@ struct sysfs_elem_bin_attr {
31 struct hlist_head buffers; 33 struct hlist_head buffers;
32}; 34};
33 35
36struct sysfs_inode_attrs {
37 struct iattr ia_iattr;
38 void *ia_secdata;
39 u32 ia_secdata_len;
40};
41
34/* 42/*
35 * sysfs_dirent - the building block of sysfs hierarchy. Each and 43 * sysfs_dirent - the building block of sysfs hierarchy. Each and
36 * every sysfs node is represented by single sysfs_dirent. 44 * every sysfs node is represented by single sysfs_dirent.
@@ -56,7 +64,7 @@ struct sysfs_dirent {
56 unsigned int s_flags; 64 unsigned int s_flags;
57 ino_t s_ino; 65 ino_t s_ino;
58 umode_t s_mode; 66 umode_t s_mode;
59 struct iattr *s_iattr; 67 struct sysfs_inode_attrs *s_iattr;
60}; 68};
61 69
62#define SD_DEACTIVATED_BIAS INT_MIN 70#define SD_DEACTIVATED_BIAS INT_MIN
@@ -148,6 +156,8 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
148struct inode *sysfs_get_inode(struct sysfs_dirent *sd); 156struct inode *sysfs_get_inode(struct sysfs_dirent *sd);
149void sysfs_delete_inode(struct inode *inode); 157void sysfs_delete_inode(struct inode *inode);
150int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 158int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
159int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
160 size_t size, int flags);
151int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name); 161int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
152int sysfs_inode_init(void); 162int sysfs_inode_init(void);
153 163
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index eaf6d891d46f..1c8991b0db13 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -65,26 +65,14 @@
65static int shrink_liability(struct ubifs_info *c, int nr_to_write) 65static int shrink_liability(struct ubifs_info *c, int nr_to_write)
66{ 66{
67 int nr_written; 67 int nr_written;
68 struct writeback_control wbc = {
69 .sync_mode = WB_SYNC_NONE,
70 .range_end = LLONG_MAX,
71 .nr_to_write = nr_to_write,
72 };
73
74 generic_sync_sb_inodes(c->vfs_sb, &wbc);
75 nr_written = nr_to_write - wbc.nr_to_write;
76 68
69 nr_written = writeback_inodes_sb(c->vfs_sb);
77 if (!nr_written) { 70 if (!nr_written) {
78 /* 71 /*
79 * Re-try again but wait on pages/inodes which are being 72 * Re-try again but wait on pages/inodes which are being
80 * written-back concurrently (e.g., by pdflush). 73 * written-back concurrently (e.g., by pdflush).
81 */ 74 */
82 memset(&wbc, 0, sizeof(struct writeback_control)); 75 nr_written = sync_inodes_sb(c->vfs_sb);
83 wbc.sync_mode = WB_SYNC_ALL;
84 wbc.range_end = LLONG_MAX;
85 wbc.nr_to_write = nr_to_write;
86 generic_sync_sb_inodes(c->vfs_sb, &wbc);
87 nr_written = nr_to_write - wbc.nr_to_write;
88 } 76 }
89 77
90 dbg_budg("%d pages were written back", nr_written); 78 dbg_budg("%d pages were written back", nr_written);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 26d2e0d80465..51763aa8f4de 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -438,12 +438,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
438{ 438{
439 int i, err; 439 int i, err;
440 struct ubifs_info *c = sb->s_fs_info; 440 struct ubifs_info *c = sb->s_fs_info;
441 struct writeback_control wbc = {
442 .sync_mode = WB_SYNC_ALL,
443 .range_start = 0,
444 .range_end = LLONG_MAX,
445 .nr_to_write = LONG_MAX,
446 };
447 441
448 /* 442 /*
449 * Zero @wait is just an advisory thing to help the file system shove 443 * Zero @wait is just an advisory thing to help the file system shove
@@ -462,7 +456,7 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
462 * the user be able to get more accurate results of 'statfs()' after 456 * the user be able to get more accurate results of 'statfs()' after
463 * they synchronize the file system. 457 * they synchronize the file system.
464 */ 458 */
465 generic_sync_sb_inodes(sb, &wbc); 459 sync_inodes_sb(sb);
466 460
467 /* 461 /*
468 * Synchronize write buffers, because 'ubifs_run_commit()' does not 462 * Synchronize write buffers, because 'ubifs_run_commit()' does not
@@ -1971,6 +1965,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1971 * 1965 *
1972 * Read-ahead will be disabled because @c->bdi.ra_pages is 0. 1966 * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
1973 */ 1967 */
1968 c->bdi.name = "ubifs",
1974 c->bdi.capabilities = BDI_CAP_MAP_COPY; 1969 c->bdi.capabilities = BDI_CAP_MAP_COPY;
1975 c->bdi.unplug_io_fn = default_unplug_io_fn; 1970 c->bdi.unplug_io_fn = default_unplug_io_fn;
1976 err = bdi_init(&c->bdi); 1971 err = bdi_init(&c->bdi);
diff --git a/fs/xattr.c b/fs/xattr.c
index 1c3d0af59ddf..6d4f6d3449fb 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -66,22 +66,28 @@ xattr_permission(struct inode *inode, const char *name, int mask)
66 return inode_permission(inode, mask); 66 return inode_permission(inode, mask);
67} 67}
68 68
69int 69/**
70vfs_setxattr(struct dentry *dentry, const char *name, const void *value, 70 * __vfs_setxattr_noperm - perform setxattr operation without performing
71 size_t size, int flags) 71 * permission checks.
72 *
73 * @dentry - object to perform setxattr on
74 * @name - xattr name to set
75 * @value - value to set @name to
76 * @size - size of @value
77 * @flags - flags to pass into filesystem operations
78 *
79 * returns the result of the internal setxattr or setsecurity operations.
80 *
81 * This function requires the caller to lock the inode's i_mutex before it
82 * is executed. It also assumes that the caller will make the appropriate
83 * permission checks.
84 */
85int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
86 const void *value, size_t size, int flags)
72{ 87{
73 struct inode *inode = dentry->d_inode; 88 struct inode *inode = dentry->d_inode;
74 int error; 89 int error = -EOPNOTSUPP;
75
76 error = xattr_permission(inode, name, MAY_WRITE);
77 if (error)
78 return error;
79 90
80 mutex_lock(&inode->i_mutex);
81 error = security_inode_setxattr(dentry, name, value, size, flags);
82 if (error)
83 goto out;
84 error = -EOPNOTSUPP;
85 if (inode->i_op->setxattr) { 91 if (inode->i_op->setxattr) {
86 error = inode->i_op->setxattr(dentry, name, value, size, flags); 92 error = inode->i_op->setxattr(dentry, name, value, size, flags);
87 if (!error) { 93 if (!error) {
@@ -97,6 +103,29 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
97 if (!error) 103 if (!error)
98 fsnotify_xattr(dentry); 104 fsnotify_xattr(dentry);
99 } 105 }
106
107 return error;
108}
109
110
111int
112vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
113 size_t size, int flags)
114{
115 struct inode *inode = dentry->d_inode;
116 int error;
117
118 error = xattr_permission(inode, name, MAY_WRITE);
119 if (error)
120 return error;
121
122 mutex_lock(&inode->i_mutex);
123 error = security_inode_setxattr(dentry, name, value, size, flags);
124 if (error)
125 goto out;
126
127 error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
128
100out: 129out:
101 mutex_unlock(&inode->i_mutex); 130 mutex_unlock(&inode->i_mutex);
102 return error; 131 return error;
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 0882d166239a..eafcc7c18706 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -619,7 +619,7 @@ xfs_file_compat_ioctl(
619 case XFS_IOC_GETVERSION_32: 619 case XFS_IOC_GETVERSION_32:
620 cmd = _NATIVE_IOC(cmd, long); 620 cmd = _NATIVE_IOC(cmd, long);
621 return xfs_file_ioctl(filp, cmd, p); 621 return xfs_file_ioctl(filp, cmd, p);
622 case XFS_IOC_SWAPEXT: { 622 case XFS_IOC_SWAPEXT_32: {
623 struct xfs_swapext sxp; 623 struct xfs_swapext sxp;
624 struct compat_xfs_swapext __user *sxu = arg; 624 struct compat_xfs_swapext __user *sxu = arg;
625 625
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 8070b34cc287..6c32f1d63d8c 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -485,14 +485,6 @@ xfs_vn_put_link(
485} 485}
486 486
487STATIC int 487STATIC int
488xfs_vn_permission(
489 struct inode *inode,
490 int mask)
491{
492 return generic_permission(inode, mask, xfs_check_acl);
493}
494
495STATIC int
496xfs_vn_getattr( 488xfs_vn_getattr(
497 struct vfsmount *mnt, 489 struct vfsmount *mnt,
498 struct dentry *dentry, 490 struct dentry *dentry,
@@ -696,7 +688,7 @@ xfs_vn_fiemap(
696} 688}
697 689
698static const struct inode_operations xfs_inode_operations = { 690static const struct inode_operations xfs_inode_operations = {
699 .permission = xfs_vn_permission, 691 .check_acl = xfs_check_acl,
700 .truncate = xfs_vn_truncate, 692 .truncate = xfs_vn_truncate,
701 .getattr = xfs_vn_getattr, 693 .getattr = xfs_vn_getattr,
702 .setattr = xfs_vn_setattr, 694 .setattr = xfs_vn_setattr,
@@ -724,7 +716,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
724 .rmdir = xfs_vn_unlink, 716 .rmdir = xfs_vn_unlink,
725 .mknod = xfs_vn_mknod, 717 .mknod = xfs_vn_mknod,
726 .rename = xfs_vn_rename, 718 .rename = xfs_vn_rename,
727 .permission = xfs_vn_permission, 719 .check_acl = xfs_check_acl,
728 .getattr = xfs_vn_getattr, 720 .getattr = xfs_vn_getattr,
729 .setattr = xfs_vn_setattr, 721 .setattr = xfs_vn_setattr,
730 .setxattr = generic_setxattr, 722 .setxattr = generic_setxattr,
@@ -749,7 +741,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
749 .rmdir = xfs_vn_unlink, 741 .rmdir = xfs_vn_unlink,
750 .mknod = xfs_vn_mknod, 742 .mknod = xfs_vn_mknod,
751 .rename = xfs_vn_rename, 743 .rename = xfs_vn_rename,
752 .permission = xfs_vn_permission, 744 .check_acl = xfs_check_acl,
753 .getattr = xfs_vn_getattr, 745 .getattr = xfs_vn_getattr,
754 .setattr = xfs_vn_setattr, 746 .setattr = xfs_vn_setattr,
755 .setxattr = generic_setxattr, 747 .setxattr = generic_setxattr,
@@ -762,7 +754,7 @@ static const struct inode_operations xfs_symlink_inode_operations = {
762 .readlink = generic_readlink, 754 .readlink = generic_readlink,
763 .follow_link = xfs_vn_follow_link, 755 .follow_link = xfs_vn_follow_link,
764 .put_link = xfs_vn_put_link, 756 .put_link = xfs_vn_put_link,
765 .permission = xfs_vn_permission, 757 .check_acl = xfs_check_acl,
766 .getattr = xfs_vn_getattr, 758 .getattr = xfs_vn_getattr,
767 .setattr = xfs_vn_setattr, 759 .setattr = xfs_vn_setattr,
768 .setxattr = generic_setxattr, 760 .setxattr = generic_setxattr,