aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile5
-rw-r--r--fs/afs/write.c19
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/ceph/addr.c9
-rw-r--r--fs/cifs/file.c10
-rw-r--r--fs/compat.c2
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/exec.c5
-rw-r--r--fs/fs-writeback.c8
-rw-r--r--fs/fuse/dev.c12
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/hostfs/hostfs_user.c5
-rw-r--r--fs/hugetlbfs/inode.c15
-rw-r--r--fs/inode.c1
-rw-r--r--fs/lockd/clntlock.c15
-rw-r--r--fs/lockd/clntproc.c13
-rw-r--r--fs/lockd/host.c1
-rw-r--r--fs/lockd/mon.c1
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svc4proc.c2
-rw-r--r--fs/lockd/svclock.c31
-rw-r--r--fs/lockd/svcproc.c2
-rw-r--r--fs/locks.c19
-rw-r--r--fs/nfs/Kconfig19
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/callback.c4
-rw-r--r--fs/nfs/callback_proc.c8
-rw-r--r--fs/nfs/client.c28
-rw-r--r--fs/nfs/dir.c1013
-rw-r--r--fs/nfs/dns_resolve.c6
-rw-r--r--fs/nfs/file.c86
-rw-r--r--fs/nfs/idmap.c211
-rw-r--r--fs/nfs/inode.c39
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/mount_clnt.c4
-rw-r--r--fs/nfs/nfs2xdr.c107
-rw-r--r--fs/nfs/nfs3proc.c62
-rw-r--r--fs/nfs/nfs3xdr.c196
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4filelayout.c280
-rw-r--r--fs/nfs/nfs4filelayout.h94
-rw-r--r--fs/nfs/nfs4filelayoutdev.c448
-rw-r--r--fs/nfs/nfs4proc.c497
-rw-r--r--fs/nfs/nfs4state.c42
-rw-r--r--fs/nfs/nfs4xdr.c700
-rw-r--r--fs/nfs/nfsroot.c568
-rw-r--r--fs/nfs/pnfs.c783
-rw-r--r--fs/nfs/pnfs.h189
-rw-r--r--fs/nfs/proc.c35
-rw-r--r--fs/nfs/read.c4
-rw-r--r--fs/nfs/super.c72
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/unlink.c259
-rw-r--r--fs/nfs/write.c22
-rw-r--r--fs/nfsd/Kconfig12
-rw-r--r--fs/nfsd/export.c73
-rw-r--r--fs/nfsd/nfs4callback.c245
-rw-r--r--fs/nfsd/nfs4idmap.c105
-rw-r--r--fs/nfsd/nfs4proc.c7
-rw-r--r--fs/nfsd/nfs4state.c493
-rw-r--r--fs/nfsd/nfs4xdr.c18
-rw-r--r--fs/nfsd/nfsctl.c26
-rw-r--r--fs/nfsd/nfsd.h2
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/nfsd/state.h52
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/proc/Kconfig4
-rw-r--r--fs/proc/base.c99
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/signalfd.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c3
71 files changed, 5148 insertions, 1991 deletions
diff --git a/fs/Makefile b/fs/Makefile
index e6ec1d309b1d..26956fcec917 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -29,10 +29,7 @@ obj-$(CONFIG_EVENTFD) += eventfd.o
29obj-$(CONFIG_AIO) += aio.o 29obj-$(CONFIG_AIO) += aio.o
30obj-$(CONFIG_FILE_LOCKING) += locks.o 30obj-$(CONFIG_FILE_LOCKING) += locks.o
31obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o 31obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
32 32obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o
33nfsd-$(CONFIG_NFSD) := nfsctl.o
34obj-y += $(nfsd-y) $(nfsd-m)
35
36obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o 33obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
37obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o 34obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
38obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o 35obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 722743b152d8..15690bb1d3b5 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -438,7 +438,6 @@ no_more:
438 */ 438 */
439int afs_writepage(struct page *page, struct writeback_control *wbc) 439int afs_writepage(struct page *page, struct writeback_control *wbc)
440{ 440{
441 struct backing_dev_info *bdi = page->mapping->backing_dev_info;
442 struct afs_writeback *wb; 441 struct afs_writeback *wb;
443 int ret; 442 int ret;
444 443
@@ -455,8 +454,6 @@ int afs_writepage(struct page *page, struct writeback_control *wbc)
455 } 454 }
456 455
457 wbc->nr_to_write -= ret; 456 wbc->nr_to_write -= ret;
458 if (wbc->nonblocking && bdi_write_congested(bdi))
459 wbc->encountered_congestion = 1;
460 457
461 _leave(" = 0"); 458 _leave(" = 0");
462 return 0; 459 return 0;
@@ -469,7 +466,6 @@ static int afs_writepages_region(struct address_space *mapping,
469 struct writeback_control *wbc, 466 struct writeback_control *wbc,
470 pgoff_t index, pgoff_t end, pgoff_t *_next) 467 pgoff_t index, pgoff_t end, pgoff_t *_next)
471{ 468{
472 struct backing_dev_info *bdi = mapping->backing_dev_info;
473 struct afs_writeback *wb; 469 struct afs_writeback *wb;
474 struct page *page; 470 struct page *page;
475 int ret, n; 471 int ret, n;
@@ -529,11 +525,6 @@ static int afs_writepages_region(struct address_space *mapping,
529 525
530 wbc->nr_to_write -= ret; 526 wbc->nr_to_write -= ret;
531 527
532 if (wbc->nonblocking && bdi_write_congested(bdi)) {
533 wbc->encountered_congestion = 1;
534 break;
535 }
536
537 cond_resched(); 528 cond_resched();
538 } while (index < end && wbc->nr_to_write > 0); 529 } while (index < end && wbc->nr_to_write > 0);
539 530
@@ -548,24 +539,16 @@ static int afs_writepages_region(struct address_space *mapping,
548int afs_writepages(struct address_space *mapping, 539int afs_writepages(struct address_space *mapping,
549 struct writeback_control *wbc) 540 struct writeback_control *wbc)
550{ 541{
551 struct backing_dev_info *bdi = mapping->backing_dev_info;
552 pgoff_t start, end, next; 542 pgoff_t start, end, next;
553 int ret; 543 int ret;
554 544
555 _enter(""); 545 _enter("");
556 546
557 if (wbc->nonblocking && bdi_write_congested(bdi)) {
558 wbc->encountered_congestion = 1;
559 _leave(" = 0 [congest]");
560 return 0;
561 }
562
563 if (wbc->range_cyclic) { 547 if (wbc->range_cyclic) {
564 start = mapping->writeback_index; 548 start = mapping->writeback_index;
565 end = -1; 549 end = -1;
566 ret = afs_writepages_region(mapping, wbc, start, end, &next); 550 ret = afs_writepages_region(mapping, wbc, start, end, &next);
567 if (start > 0 && wbc->nr_to_write > 0 && ret == 0 && 551 if (start > 0 && wbc->nr_to_write > 0 && ret == 0)
568 !(wbc->nonblocking && wbc->encountered_congestion))
569 ret = afs_writepages_region(mapping, wbc, 0, start, 552 ret = afs_writepages_region(mapping, wbc, 0, start,
570 &next); 553 &next);
571 mapping->writeback_index = next; 554 mapping->writeback_index = next;
diff --git a/fs/buffer.c b/fs/buffer.c
index d895d9fd5b71..5930e382959b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1705,7 +1705,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1705 * and kswapd activity, but those code paths have their own 1705 * and kswapd activity, but those code paths have their own
1706 * higher-level throttling. 1706 * higher-level throttling.
1707 */ 1707 */
1708 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 1708 if (wbc->sync_mode != WB_SYNC_NONE) {
1709 lock_buffer(bh); 1709 lock_buffer(bh);
1710 } else if (!trylock_buffer(bh)) { 1710 } else if (!trylock_buffer(bh)) {
1711 redirty_page_for_writepage(wbc, page); 1711 redirty_page_for_writepage(wbc, page);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 51bcc5ce3230..e9c874abc9e1 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -591,7 +591,6 @@ static int ceph_writepages_start(struct address_space *mapping,
591 struct writeback_control *wbc) 591 struct writeback_control *wbc)
592{ 592{
593 struct inode *inode = mapping->host; 593 struct inode *inode = mapping->host;
594 struct backing_dev_info *bdi = mapping->backing_dev_info;
595 struct ceph_inode_info *ci = ceph_inode(inode); 594 struct ceph_inode_info *ci = ceph_inode(inode);
596 struct ceph_fs_client *fsc; 595 struct ceph_fs_client *fsc;
597 pgoff_t index, start, end; 596 pgoff_t index, start, end;
@@ -633,13 +632,6 @@ static int ceph_writepages_start(struct address_space *mapping,
633 632
634 pagevec_init(&pvec, 0); 633 pagevec_init(&pvec, 0);
635 634
636 /* ?? */
637 if (wbc->nonblocking && bdi_write_congested(bdi)) {
638 dout(" writepages congested\n");
639 wbc->encountered_congestion = 1;
640 goto out_final;
641 }
642
643 /* where to start/end? */ 635 /* where to start/end? */
644 if (wbc->range_cyclic) { 636 if (wbc->range_cyclic) {
645 start = mapping->writeback_index; /* Start from prev offset */ 637 start = mapping->writeback_index; /* Start from prev offset */
@@ -885,7 +877,6 @@ out:
885 rc = 0; /* vfs expects us to return 0 */ 877 rc = 0; /* vfs expects us to return 0 */
886 ceph_put_snap_context(snapc); 878 ceph_put_snap_context(snapc);
887 dout("writepages done, rc = %d\n", rc); 879 dout("writepages done, rc = %d\n", rc);
888out_final:
889 return rc; 880 return rc;
890} 881}
891 882
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8c81e7b14d53..45af003865d2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1303,7 +1303,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1303static int cifs_writepages(struct address_space *mapping, 1303static int cifs_writepages(struct address_space *mapping,
1304 struct writeback_control *wbc) 1304 struct writeback_control *wbc)
1305{ 1305{
1306 struct backing_dev_info *bdi = mapping->backing_dev_info;
1307 unsigned int bytes_to_write; 1306 unsigned int bytes_to_write;
1308 unsigned int bytes_written; 1307 unsigned int bytes_written;
1309 struct cifs_sb_info *cifs_sb; 1308 struct cifs_sb_info *cifs_sb;
@@ -1326,15 +1325,6 @@ static int cifs_writepages(struct address_space *mapping,
1326 int scanned = 0; 1325 int scanned = 0;
1327 int xid, long_op; 1326 int xid, long_op;
1328 1327
1329 /*
1330 * BB: Is this meaningful for a non-block-device file system?
1331 * If it is, we should test it again after we do I/O
1332 */
1333 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1334 wbc->encountered_congestion = 1;
1335 return 0;
1336 }
1337
1338 cifs_sb = CIFS_SB(mapping->host->i_sb); 1328 cifs_sb = CIFS_SB(mapping->host->i_sb);
1339 1329
1340 /* 1330 /*
diff --git a/fs/compat.c b/fs/compat.c
index 0644a154672b..f03abdadc401 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1963,7 +1963,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1963} 1963}
1964#endif /* HAVE_SET_RESTORE_SIGMASK */ 1964#endif /* HAVE_SET_RESTORE_SIGMASK */
1965 1965
1966#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) 1966#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED)
1967/* Stuff for NFS server syscalls... */ 1967/* Stuff for NFS server syscalls... */
1968struct compat_nfsctl_svc { 1968struct compat_nfsctl_svc {
1969 u16 svc32_port; 1969 u16 svc32_port;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 48d74c7391d1..85882f6ba5f7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
218 * filesystems can use it to hold additional state between get_block calls and 218 * filesystems can use it to hold additional state between get_block calls and
219 * dio_complete. 219 * dio_complete.
220 */ 220 */
221static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async) 221static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is_async)
222{ 222{
223 ssize_t transferred = 0; 223 ssize_t transferred = 0;
224 224
diff --git a/fs/exec.c b/fs/exec.c
index 6d2b6f936858..3aa75b8888a1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -54,6 +54,7 @@
54#include <linux/fsnotify.h> 54#include <linux/fsnotify.h>
55#include <linux/fs_struct.h> 55#include <linux/fs_struct.h>
56#include <linux/pipe_fs_i.h> 56#include <linux/pipe_fs_i.h>
57#include <linux/oom.h>
57 58
58#include <asm/uaccess.h> 59#include <asm/uaccess.h>
59#include <asm/mmu_context.h> 60#include <asm/mmu_context.h>
@@ -759,6 +760,10 @@ static int exec_mmap(struct mm_struct *mm)
759 tsk->mm = mm; 760 tsk->mm = mm;
760 tsk->active_mm = mm; 761 tsk->active_mm = mm;
761 activate_mm(active_mm, mm); 762 activate_mm(active_mm, mm);
763 if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
764 atomic_dec(&old_mm->oom_disable_count);
765 atomic_inc(&tsk->mm->oom_disable_count);
766 }
762 task_unlock(tsk); 767 task_unlock(tsk);
763 arch_pick_mmap_layout(mm); 768 arch_pick_mmap_layout(mm);
764 if (old_mm) { 769 if (old_mm) {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f6af81add459..aed881a76b22 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -586,7 +586,7 @@ static inline bool over_bground_thresh(void)
586 global_dirty_limits(&background_thresh, &dirty_thresh); 586 global_dirty_limits(&background_thresh, &dirty_thresh);
587 587
588 return (global_page_state(NR_FILE_DIRTY) + 588 return (global_page_state(NR_FILE_DIRTY) +
589 global_page_state(NR_UNSTABLE_NFS) >= background_thresh); 589 global_page_state(NR_UNSTABLE_NFS) > background_thresh);
590} 590}
591 591
592/* 592/*
@@ -724,6 +724,10 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
724 return 0; 724 return 0;
725 725
726 wb->last_old_flush = jiffies; 726 wb->last_old_flush = jiffies;
727 /*
728 * Add in the number of potentially dirty inodes, because each inode
729 * write can dirty pagecache in the underlying blockdev.
730 */
727 nr_pages = global_page_state(NR_FILE_DIRTY) + 731 nr_pages = global_page_state(NR_FILE_DIRTY) +
728 global_page_state(NR_UNSTABLE_NFS) + 732 global_page_state(NR_UNSTABLE_NFS) +
729 get_nr_dirty_inodes(); 733 get_nr_dirty_inodes();
@@ -793,7 +797,7 @@ int bdi_writeback_thread(void *data)
793 struct backing_dev_info *bdi = wb->bdi; 797 struct backing_dev_info *bdi = wb->bdi;
794 long pages_written; 798 long pages_written;
795 799
796 current->flags |= PF_FLUSHER | PF_SWAPWRITE; 800 current->flags |= PF_SWAPWRITE;
797 set_freezable(); 801 set_freezable();
798 wb->last_active = jiffies; 802 wb->last_active = jiffies;
799 803
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cde755cca564..b98664275f02 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -809,11 +809,9 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
809 int err; 809 int err;
810 struct page *page = *pagep; 810 struct page *page = *pagep;
811 811
812 if (page && zeroing && count < PAGE_SIZE) { 812 if (page && zeroing && count < PAGE_SIZE)
813 void *mapaddr = kmap_atomic(page, KM_USER1); 813 clear_highpage(page);
814 memset(mapaddr, 0, PAGE_SIZE); 814
815 kunmap_atomic(mapaddr, KM_USER1);
816 }
817 while (count) { 815 while (count) {
818 if (cs->write && cs->pipebufs && page) { 816 if (cs->write && cs->pipebufs && page) {
819 return fuse_ref_page(cs, page, offset, count); 817 return fuse_ref_page(cs, page, offset, count);
@@ -830,10 +828,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
830 } 828 }
831 } 829 }
832 if (page) { 830 if (page) {
833 void *mapaddr = kmap_atomic(page, KM_USER1); 831 void *mapaddr = kmap_atomic(page, KM_USER0);
834 void *buf = mapaddr + offset; 832 void *buf = mapaddr + offset;
835 offset += fuse_copy_do(cs, &buf, &count); 833 offset += fuse_copy_do(cs, &buf, &count);
836 kunmap_atomic(mapaddr, KM_USER1); 834 kunmap_atomic(mapaddr, KM_USER0);
837 } else 835 } else
838 offset += fuse_copy_do(cs, NULL, &count); 836 offset += fuse_copy_do(cs, NULL, &count);
839 } 837 }
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index f3b071f921aa..939739c7b3f9 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -55,7 +55,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
55 * activity, but those code paths have their own higher-level 55 * activity, but those code paths have their own higher-level
56 * throttling. 56 * throttling.
57 */ 57 */
58 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 58 if (wbc->sync_mode != WB_SYNC_NONE) {
59 lock_buffer(bh); 59 lock_buffer(bh);
60 } else if (!trylock_buffer(bh)) { 60 } else if (!trylock_buffer(bh)) {
61 redirty_page_for_writepage(wbc, page); 61 redirty_page_for_writepage(wbc, page);
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 8d02683585e0..d51a98384bc0 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -94,8 +94,7 @@ void *open_dir(char *path, int *err_out)
94 94
95 dir = opendir(path); 95 dir = opendir(path);
96 *err_out = errno; 96 *err_out = errno;
97 if (dir == NULL) 97
98 return NULL;
99 return dir; 98 return dir;
100} 99}
101 100
@@ -205,7 +204,7 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
205 if (attrs->ia_valid & HOSTFS_ATTR_MODE) { 204 if (attrs->ia_valid & HOSTFS_ATTR_MODE) {
206 if (fd >= 0) { 205 if (fd >= 0) {
207 if (fchmod(fd, attrs->ia_mode) != 0) 206 if (fchmod(fd, attrs->ia_mode) != 0)
208 return (-errno); 207 return -errno;
209 } else if (chmod(file, attrs->ia_mode) != 0) { 208 } else if (chmod(file, attrs->ia_mode) != 0) {
210 return -errno; 209 return -errno;
211 } 210 }
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8d0607b37266..b14be3f781c7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -31,6 +31,7 @@
31#include <linux/statfs.h> 31#include <linux/statfs.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/magic.h> 33#include <linux/magic.h>
34#include <linux/migrate.h>
34 35
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36 37
@@ -574,6 +575,19 @@ static int hugetlbfs_set_page_dirty(struct page *page)
574 return 0; 575 return 0;
575} 576}
576 577
578static int hugetlbfs_migrate_page(struct address_space *mapping,
579 struct page *newpage, struct page *page)
580{
581 int rc;
582
583 rc = migrate_huge_page_move_mapping(mapping, newpage, page);
584 if (rc)
585 return rc;
586 migrate_page_copy(newpage, page);
587
588 return 0;
589}
590
577static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 591static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
578{ 592{
579 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 593 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
@@ -660,6 +674,7 @@ static const struct address_space_operations hugetlbfs_aops = {
660 .write_begin = hugetlbfs_write_begin, 674 .write_begin = hugetlbfs_write_begin,
661 .write_end = hugetlbfs_write_end, 675 .write_end = hugetlbfs_write_end,
662 .set_page_dirty = hugetlbfs_set_page_dirty, 676 .set_page_dirty = hugetlbfs_set_page_dirty,
677 .migratepage = hugetlbfs_migrate_page,
663}; 678};
664 679
665 680
diff --git a/fs/inode.c b/fs/inode.c
index a6d60682f0fd..ae2727ab0c3a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,6 +24,7 @@
24#include <linux/mount.h> 24#include <linux/mount.h>
25#include <linux/async.h> 25#include <linux/async.h>
26#include <linux/posix_acl.h> 26#include <linux/posix_acl.h>
27#include <linux/ima.h>
27 28
28/* 29/*
29 * This is needed for the following functions: 30 * This is needed for the following functions:
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 64fd427c993c..d5bb86866e6c 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -42,6 +42,7 @@ struct nlm_wait {
42}; 42};
43 43
44static LIST_HEAD(nlm_blocked); 44static LIST_HEAD(nlm_blocked);
45static DEFINE_SPINLOCK(nlm_blocked_lock);
45 46
46/** 47/**
47 * nlmclnt_init - Set up per-NFS mount point lockd data structures 48 * nlmclnt_init - Set up per-NFS mount point lockd data structures
@@ -97,7 +98,10 @@ struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *
97 block->b_lock = fl; 98 block->b_lock = fl;
98 init_waitqueue_head(&block->b_wait); 99 init_waitqueue_head(&block->b_wait);
99 block->b_status = nlm_lck_blocked; 100 block->b_status = nlm_lck_blocked;
101
102 spin_lock(&nlm_blocked_lock);
100 list_add(&block->b_list, &nlm_blocked); 103 list_add(&block->b_list, &nlm_blocked);
104 spin_unlock(&nlm_blocked_lock);
101 } 105 }
102 return block; 106 return block;
103} 107}
@@ -106,7 +110,9 @@ void nlmclnt_finish_block(struct nlm_wait *block)
106{ 110{
107 if (block == NULL) 111 if (block == NULL)
108 return; 112 return;
113 spin_lock(&nlm_blocked_lock);
109 list_del(&block->b_list); 114 list_del(&block->b_list);
115 spin_unlock(&nlm_blocked_lock);
110 kfree(block); 116 kfree(block);
111} 117}
112 118
@@ -154,6 +160,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
154 * Look up blocked request based on arguments. 160 * Look up blocked request based on arguments.
155 * Warning: must not use cookie to match it! 161 * Warning: must not use cookie to match it!
156 */ 162 */
163 spin_lock(&nlm_blocked_lock);
157 list_for_each_entry(block, &nlm_blocked, b_list) { 164 list_for_each_entry(block, &nlm_blocked, b_list) {
158 struct file_lock *fl_blocked = block->b_lock; 165 struct file_lock *fl_blocked = block->b_lock;
159 166
@@ -178,6 +185,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
178 wake_up(&block->b_wait); 185 wake_up(&block->b_wait);
179 res = nlm_granted; 186 res = nlm_granted;
180 } 187 }
188 spin_unlock(&nlm_blocked_lock);
181 return res; 189 return res;
182} 190}
183 191
@@ -216,10 +224,6 @@ reclaimer(void *ptr)
216 allow_signal(SIGKILL); 224 allow_signal(SIGKILL);
217 225
218 down_write(&host->h_rwsem); 226 down_write(&host->h_rwsem);
219
220 /* This one ensures that our parent doesn't terminate while the
221 * reclaim is in progress */
222 lock_kernel();
223 lockd_up(); /* note: this cannot fail as lockd is already running */ 227 lockd_up(); /* note: this cannot fail as lockd is already running */
224 228
225 dprintk("lockd: reclaiming locks for host %s\n", host->h_name); 229 dprintk("lockd: reclaiming locks for host %s\n", host->h_name);
@@ -260,16 +264,17 @@ restart:
260 dprintk("NLM: done reclaiming locks for host %s\n", host->h_name); 264 dprintk("NLM: done reclaiming locks for host %s\n", host->h_name);
261 265
262 /* Now, wake up all processes that sleep on a blocked lock */ 266 /* Now, wake up all processes that sleep on a blocked lock */
267 spin_lock(&nlm_blocked_lock);
263 list_for_each_entry(block, &nlm_blocked, b_list) { 268 list_for_each_entry(block, &nlm_blocked, b_list) {
264 if (block->b_host == host) { 269 if (block->b_host == host) {
265 block->b_status = nlm_lck_denied_grace_period; 270 block->b_status = nlm_lck_denied_grace_period;
266 wake_up(&block->b_wait); 271 wake_up(&block->b_wait);
267 } 272 }
268 } 273 }
274 spin_unlock(&nlm_blocked_lock);
269 275
270 /* Release host handle after use */ 276 /* Release host handle after use */
271 nlm_release_host(host); 277 nlm_release_host(host);
272 lockd_down(); 278 lockd_down();
273 unlock_kernel();
274 return 0; 279 return 0;
275} 280}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 7932c399fab4..47ea1e1925b8 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -166,7 +166,6 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
166 /* Set up the argument struct */ 166 /* Set up the argument struct */
167 nlmclnt_setlockargs(call, fl); 167 nlmclnt_setlockargs(call, fl);
168 168
169 lock_kernel();
170 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { 169 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
171 if (fl->fl_type != F_UNLCK) { 170 if (fl->fl_type != F_UNLCK) {
172 call->a_args.block = IS_SETLKW(cmd) ? 1 : 0; 171 call->a_args.block = IS_SETLKW(cmd) ? 1 : 0;
@@ -177,10 +176,8 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
177 status = nlmclnt_test(call, fl); 176 status = nlmclnt_test(call, fl);
178 else 177 else
179 status = -EINVAL; 178 status = -EINVAL;
180
181 fl->fl_ops->fl_release_private(fl); 179 fl->fl_ops->fl_release_private(fl);
182 fl->fl_ops = NULL; 180 fl->fl_ops = NULL;
183 unlock_kernel();
184 181
185 dprintk("lockd: clnt proc returns %d\n", status); 182 dprintk("lockd: clnt proc returns %d\n", status);
186 return status; 183 return status;
@@ -226,9 +223,7 @@ void nlm_release_call(struct nlm_rqst *call)
226 223
227static void nlmclnt_rpc_release(void *data) 224static void nlmclnt_rpc_release(void *data)
228{ 225{
229 lock_kernel();
230 nlm_release_call(data); 226 nlm_release_call(data);
231 unlock_kernel();
232} 227}
233 228
234static int nlm_wait_on_grace(wait_queue_head_t *queue) 229static int nlm_wait_on_grace(wait_queue_head_t *queue)
@@ -448,14 +443,18 @@ out:
448 443
449static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl) 444static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
450{ 445{
446 spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
451 new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state; 447 new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state;
452 new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner); 448 new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner);
453 list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted); 449 list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted);
450 spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
454} 451}
455 452
456static void nlmclnt_locks_release_private(struct file_lock *fl) 453static void nlmclnt_locks_release_private(struct file_lock *fl)
457{ 454{
455 spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock);
458 list_del(&fl->fl_u.nfs_fl.list); 456 list_del(&fl->fl_u.nfs_fl.list);
457 spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock);
459 nlm_put_lockowner(fl->fl_u.nfs_fl.owner); 458 nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
460} 459}
461 460
@@ -721,9 +720,7 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
721die: 720die:
722 return; 721 return;
723 retry_rebind: 722 retry_rebind:
724 lock_kernel();
725 nlm_rebind_host(req->a_host); 723 nlm_rebind_host(req->a_host);
726 unlock_kernel();
727 retry_unlock: 724 retry_unlock:
728 rpc_restart_call(task); 725 rpc_restart_call(task);
729} 726}
@@ -801,9 +798,7 @@ retry_cancel:
801 /* Don't ever retry more than 3 times */ 798 /* Don't ever retry more than 3 times */
802 if (req->a_retries++ >= NLMCLNT_MAX_RETRIES) 799 if (req->a_retries++ >= NLMCLNT_MAX_RETRIES)
803 goto die; 800 goto die;
804 lock_kernel();
805 nlm_rebind_host(req->a_host); 801 nlm_rebind_host(req->a_host);
806 unlock_kernel();
807 rpc_restart_call(task); 802 rpc_restart_call(task);
808 rpc_delay(task, 30 * HZ); 803 rpc_delay(task, 30 * HZ);
809} 804}
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index bb464d12104c..25e21e4023b2 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -353,6 +353,7 @@ nlm_bind_host(struct nlm_host *host)
353 .to_retries = 5U, 353 .to_retries = 5U,
354 }; 354 };
355 struct rpc_create_args args = { 355 struct rpc_create_args args = {
356 .net = &init_net,
356 .protocol = host->h_proto, 357 .protocol = host->h_proto,
357 .address = nlm_addr(host), 358 .address = nlm_addr(host),
358 .addrsize = host->h_addrlen, 359 .addrsize = host->h_addrlen,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e3015464fbab..e0c918949644 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -69,6 +69,7 @@ static struct rpc_clnt *nsm_create(void)
69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK), 69 .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
70 }; 70 };
71 struct rpc_create_args args = { 71 struct rpc_create_args args = {
72 .net = &init_net,
72 .protocol = XPRT_TRANSPORT_UDP, 73 .protocol = XPRT_TRANSPORT_UDP,
73 .address = (struct sockaddr *)&sin, 74 .address = (struct sockaddr *)&sin,
74 .addrsize = sizeof(sin), 75 .addrsize = sizeof(sin),
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f1bacf1a0391..b13aabc12298 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -206,7 +206,7 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name,
206 206
207 xprt = svc_find_xprt(serv, name, family, 0); 207 xprt = svc_find_xprt(serv, name, family, 0);
208 if (xprt == NULL) 208 if (xprt == NULL)
209 return svc_create_xprt(serv, name, family, port, 209 return svc_create_xprt(serv, name, &init_net, family, port,
210 SVC_SOCK_DEFAULTS); 210 SVC_SOCK_DEFAULTS);
211 svc_xprt_put(xprt); 211 svc_xprt_put(xprt);
212 return 0; 212 return 0;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 031c6569a134..a336e832475d 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -230,9 +230,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
230 230
231static void nlm4svc_callback_release(void *data) 231static void nlm4svc_callback_release(void *data)
232{ 232{
233 lock_kernel();
234 nlm_release_call(data); 233 nlm_release_call(data);
235 unlock_kernel();
236} 234}
237 235
238static const struct rpc_call_ops nlm4svc_callback_ops = { 236static const struct rpc_call_ops nlm4svc_callback_ops = {
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 84055d31bfc5..6f1ef000975a 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -52,12 +52,13 @@ static const struct rpc_call_ops nlmsvc_grant_ops;
52 * The list of blocked locks to retry 52 * The list of blocked locks to retry
53 */ 53 */
54static LIST_HEAD(nlm_blocked); 54static LIST_HEAD(nlm_blocked);
55static DEFINE_SPINLOCK(nlm_blocked_lock);
55 56
56/* 57/*
57 * Insert a blocked lock into the global list 58 * Insert a blocked lock into the global list
58 */ 59 */
59static void 60static void
60nlmsvc_insert_block(struct nlm_block *block, unsigned long when) 61nlmsvc_insert_block_locked(struct nlm_block *block, unsigned long when)
61{ 62{
62 struct nlm_block *b; 63 struct nlm_block *b;
63 struct list_head *pos; 64 struct list_head *pos;
@@ -87,6 +88,13 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
87 block->b_when = when; 88 block->b_when = when;
88} 89}
89 90
91static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when)
92{
93 spin_lock(&nlm_blocked_lock);
94 nlmsvc_insert_block_locked(block, when);
95 spin_unlock(&nlm_blocked_lock);
96}
97
90/* 98/*
91 * Remove a block from the global list 99 * Remove a block from the global list
92 */ 100 */
@@ -94,7 +102,9 @@ static inline void
94nlmsvc_remove_block(struct nlm_block *block) 102nlmsvc_remove_block(struct nlm_block *block)
95{ 103{
96 if (!list_empty(&block->b_list)) { 104 if (!list_empty(&block->b_list)) {
105 spin_lock(&nlm_blocked_lock);
97 list_del_init(&block->b_list); 106 list_del_init(&block->b_list);
107 spin_unlock(&nlm_blocked_lock);
98 nlmsvc_release_block(block); 108 nlmsvc_release_block(block);
99 } 109 }
100} 110}
@@ -651,7 +661,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
651 struct nlm_block *block; 661 struct nlm_block *block;
652 int rc = -ENOENT; 662 int rc = -ENOENT;
653 663
654 lock_kernel(); 664 spin_lock(&nlm_blocked_lock);
655 list_for_each_entry(block, &nlm_blocked, b_list) { 665 list_for_each_entry(block, &nlm_blocked, b_list) {
656 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { 666 if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
657 dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n", 667 dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n",
@@ -665,13 +675,13 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
665 } else if (result == 0) 675 } else if (result == 0)
666 block->b_granted = 1; 676 block->b_granted = 1;
667 677
668 nlmsvc_insert_block(block, 0); 678 nlmsvc_insert_block_locked(block, 0);
669 svc_wake_up(block->b_daemon); 679 svc_wake_up(block->b_daemon);
670 rc = 0; 680 rc = 0;
671 break; 681 break;
672 } 682 }
673 } 683 }
674 unlock_kernel(); 684 spin_unlock(&nlm_blocked_lock);
675 if (rc == -ENOENT) 685 if (rc == -ENOENT)
676 printk(KERN_WARNING "lockd: grant for unknown block\n"); 686 printk(KERN_WARNING "lockd: grant for unknown block\n");
677 return rc; 687 return rc;
@@ -803,7 +813,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
803 813
804 dprintk("lockd: GRANT_MSG RPC callback\n"); 814 dprintk("lockd: GRANT_MSG RPC callback\n");
805 815
806 lock_kernel(); 816 spin_lock(&nlm_blocked_lock);
807 /* if the block is not on a list at this point then it has 817 /* if the block is not on a list at this point then it has
808 * been invalidated. Don't try to requeue it. 818 * been invalidated. Don't try to requeue it.
809 * 819 *
@@ -825,19 +835,20 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
825 /* Call was successful, now wait for client callback */ 835 /* Call was successful, now wait for client callback */
826 timeout = 60 * HZ; 836 timeout = 60 * HZ;
827 } 837 }
828 nlmsvc_insert_block(block, timeout); 838 nlmsvc_insert_block_locked(block, timeout);
829 svc_wake_up(block->b_daemon); 839 svc_wake_up(block->b_daemon);
830out: 840out:
831 unlock_kernel(); 841 spin_unlock(&nlm_blocked_lock);
832} 842}
833 843
844/*
845 * FIXME: nlmsvc_release_block() grabs a mutex. This is not allowed for an
846 * .rpc_release rpc_call_op
847 */
834static void nlmsvc_grant_release(void *data) 848static void nlmsvc_grant_release(void *data)
835{ 849{
836 struct nlm_rqst *call = data; 850 struct nlm_rqst *call = data;
837
838 lock_kernel();
839 nlmsvc_release_block(call->a_block); 851 nlmsvc_release_block(call->a_block);
840 unlock_kernel();
841} 852}
842 853
843static const struct rpc_call_ops nlmsvc_grant_ops = { 854static const struct rpc_call_ops nlmsvc_grant_ops = {
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 0f2ab741ae7c..c3069f38d602 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -260,9 +260,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
260 260
261static void nlmsvc_callback_release(void *data) 261static void nlmsvc_callback_release(void *data)
262{ 262{
263 lock_kernel();
264 nlm_release_call(data); 263 nlm_release_call(data);
265 unlock_kernel();
266} 264}
267 265
268static const struct rpc_call_ops nlmsvc_callback_ops = { 266static const struct rpc_call_ops nlmsvc_callback_ops = {
diff --git a/fs/locks.c b/fs/locks.c
index 8b2b6ad56a09..4de3a2666810 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2109,7 +2109,7 @@ EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2109#include <linux/seq_file.h> 2109#include <linux/seq_file.h>
2110 2110
2111static void lock_get_status(struct seq_file *f, struct file_lock *fl, 2111static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2112 int id, char *pfx) 2112 loff_t id, char *pfx)
2113{ 2113{
2114 struct inode *inode = NULL; 2114 struct inode *inode = NULL;
2115 unsigned int fl_pid; 2115 unsigned int fl_pid;
@@ -2122,7 +2122,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2122 if (fl->fl_file != NULL) 2122 if (fl->fl_file != NULL)
2123 inode = fl->fl_file->f_path.dentry->d_inode; 2123 inode = fl->fl_file->f_path.dentry->d_inode;
2124 2124
2125 seq_printf(f, "%d:%s ", id, pfx); 2125 seq_printf(f, "%lld:%s ", id, pfx);
2126 if (IS_POSIX(fl)) { 2126 if (IS_POSIX(fl)) {
2127 seq_printf(f, "%6s %s ", 2127 seq_printf(f, "%6s %s ",
2128 (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", 2128 (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ",
@@ -2185,24 +2185,27 @@ static int locks_show(struct seq_file *f, void *v)
2185 2185
2186 fl = list_entry(v, struct file_lock, fl_link); 2186 fl = list_entry(v, struct file_lock, fl_link);
2187 2187
2188 lock_get_status(f, fl, (long)f->private, ""); 2188 lock_get_status(f, fl, *((loff_t *)f->private), "");
2189 2189
2190 list_for_each_entry(bfl, &fl->fl_block, fl_block) 2190 list_for_each_entry(bfl, &fl->fl_block, fl_block)
2191 lock_get_status(f, bfl, (long)f->private, " ->"); 2191 lock_get_status(f, bfl, *((loff_t *)f->private), " ->");
2192 2192
2193 f->private++;
2194 return 0; 2193 return 0;
2195} 2194}
2196 2195
2197static void *locks_start(struct seq_file *f, loff_t *pos) 2196static void *locks_start(struct seq_file *f, loff_t *pos)
2198{ 2197{
2198 loff_t *p = f->private;
2199
2199 lock_flocks(); 2200 lock_flocks();
2200 f->private = (void *)1; 2201 *p = (*pos + 1);
2201 return seq_list_start(&file_lock_list, *pos); 2202 return seq_list_start(&file_lock_list, *pos);
2202} 2203}
2203 2204
2204static void *locks_next(struct seq_file *f, void *v, loff_t *pos) 2205static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
2205{ 2206{
2207 loff_t *p = f->private;
2208 ++*p;
2206 return seq_list_next(v, &file_lock_list, pos); 2209 return seq_list_next(v, &file_lock_list, pos);
2207} 2210}
2208 2211
@@ -2220,14 +2223,14 @@ static const struct seq_operations locks_seq_operations = {
2220 2223
2221static int locks_open(struct inode *inode, struct file *filp) 2224static int locks_open(struct inode *inode, struct file *filp)
2222{ 2225{
2223 return seq_open(filp, &locks_seq_operations); 2226 return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t));
2224} 2227}
2225 2228
2226static const struct file_operations proc_locks_operations = { 2229static const struct file_operations proc_locks_operations = {
2227 .open = locks_open, 2230 .open = locks_open,
2228 .read = seq_read, 2231 .read = seq_read,
2229 .llseek = seq_lseek, 2232 .llseek = seq_lseek,
2230 .release = seq_release, 2233 .release = seq_release_private,
2231}; 2234};
2232 2235
2233static int __init proc_locks_init(void) 2236static int __init proc_locks_init(void)
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index b950415d7c43..fd667652c502 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -77,13 +77,17 @@ config NFS_V4
77 77
78config NFS_V4_1 78config NFS_V4_1
79 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" 79 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
80 depends on NFS_V4 && EXPERIMENTAL 80 depends on NFS_FS && NFS_V4 && EXPERIMENTAL
81 select PNFS_FILE_LAYOUT
81 help 82 help
82 This option enables support for minor version 1 of the NFSv4 protocol 83 This option enables support for minor version 1 of the NFSv4 protocol
83 (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. 84 (RFC 5661) in the kernel's NFS client.
84 85
85 If unsure, say N. 86 If unsure, say N.
86 87
88config PNFS_FILE_LAYOUT
89 tristate
90
87config ROOT_NFS 91config ROOT_NFS
88 bool "Root file system on NFS" 92 bool "Root file system on NFS"
89 depends on NFS_FS=y && IP_PNP 93 depends on NFS_FS=y && IP_PNP
@@ -118,3 +122,14 @@ config NFS_USE_KERNEL_DNS
118 select DNS_RESOLVER 122 select DNS_RESOLVER
119 select KEYS 123 select KEYS
120 default y 124 default y
125
126config NFS_USE_NEW_IDMAPPER
127 bool "Use the new idmapper upcall routine"
128 depends on NFS_V4 && KEYS
129 help
130 Say Y here if you want NFS to use the new idmapper upcall functions.
131 You will need /sbin/request-key (usually provided by the keyutils
132 package). For details, read
133 <file:Documentation/filesystems/nfs/idmapper.txt>.
134
135 If you are unsure, say N.
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index da7fda639eac..4776ff9e3814 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -15,5 +15,9 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
15 delegation.o idmap.o \ 15 delegation.o idmap.o \
16 callback.o callback_xdr.o callback_proc.o \ 16 callback.o callback_xdr.o callback_proc.o \
17 nfs4namespace.o 17 nfs4namespace.o
18nfs-$(CONFIG_NFS_V4_1) += pnfs.o
18nfs-$(CONFIG_SYSCTL) += sysctl.o 19nfs-$(CONFIG_SYSCTL) += sysctl.o
19nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o 20nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
21
22obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
23nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index e17b49e2eabd..aeec017fe814 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -109,7 +109,7 @@ nfs4_callback_up(struct svc_serv *serv)
109{ 109{
110 int ret; 110 int ret;
111 111
112 ret = svc_create_xprt(serv, "tcp", PF_INET, 112 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
113 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 113 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
114 if (ret <= 0) 114 if (ret <= 0)
115 goto out_err; 115 goto out_err;
@@ -117,7 +117,7 @@ nfs4_callback_up(struct svc_serv *serv)
117 dprintk("NFS: Callback listener port = %u (af %u)\n", 117 dprintk("NFS: Callback listener port = %u (af %u)\n",
118 nfs_callback_tcpport, PF_INET); 118 nfs_callback_tcpport, PF_INET);
119 119
120 ret = svc_create_xprt(serv, "tcp", PF_INET6, 120 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
121 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); 121 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
122 if (ret > 0) { 122 if (ret > 0) {
123 nfs_callback_tcpport6 = ret; 123 nfs_callback_tcpport6 = ret;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 930d10fecdaf..2950fca0c61b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -118,11 +118,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n
118 if (delegation == NULL) 118 if (delegation == NULL)
119 return 0; 119 return 0;
120 120
121 /* seqid is 4-bytes long */ 121 if (stateid->stateid.seqid != 0)
122 if (((u32 *) &stateid->data)[0] != 0)
123 return 0; 122 return 0;
124 if (memcmp(&delegation->stateid.data[4], &stateid->data[4], 123 if (memcmp(&delegation->stateid.stateid.other,
125 sizeof(stateid->data)-4)) 124 &stateid->stateid.other,
125 NFS4_STATEID_OTHER_SIZE))
126 return 0; 126 return 0;
127 127
128 return 1; 128 return 1;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e7340729af89..0870d0d4efc0 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -48,6 +48,7 @@
48#include "iostat.h" 48#include "iostat.h"
49#include "internal.h" 49#include "internal.h"
50#include "fscache.h" 50#include "fscache.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_CLIENT 53#define NFSDBG_FACILITY NFSDBG_CLIENT
53 54
@@ -155,7 +156,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
155 cred = rpc_lookup_machine_cred(); 156 cred = rpc_lookup_machine_cred();
156 if (!IS_ERR(cred)) 157 if (!IS_ERR(cred))
157 clp->cl_machine_cred = cred; 158 clp->cl_machine_cred = cred;
158 159#if defined(CONFIG_NFS_V4_1)
160 INIT_LIST_HEAD(&clp->cl_layouts);
161#endif
159 nfs_fscache_get_client_cookie(clp); 162 nfs_fscache_get_client_cookie(clp);
160 163
161 return clp; 164 return clp;
@@ -252,6 +255,7 @@ void nfs_put_client(struct nfs_client *clp)
252 nfs_free_client(clp); 255 nfs_free_client(clp);
253 } 256 }
254} 257}
258EXPORT_SYMBOL_GPL(nfs_put_client);
255 259
256#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 260#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
257/* 261/*
@@ -601,6 +605,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
601{ 605{
602 struct rpc_clnt *clnt = NULL; 606 struct rpc_clnt *clnt = NULL;
603 struct rpc_create_args args = { 607 struct rpc_create_args args = {
608 .net = &init_net,
604 .protocol = clp->cl_proto, 609 .protocol = clp->cl_proto,
605 .address = (struct sockaddr *)&clp->cl_addr, 610 .address = (struct sockaddr *)&clp->cl_addr,
606 .addrsize = clp->cl_addrlen, 611 .addrsize = clp->cl_addrlen,
@@ -635,7 +640,8 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
635 */ 640 */
636static void nfs_destroy_server(struct nfs_server *server) 641static void nfs_destroy_server(struct nfs_server *server)
637{ 642{
638 if (!(server->flags & NFS_MOUNT_NONLM)) 643 if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) ||
644 !(server->flags & NFS_MOUNT_LOCAL_FCNTL))
639 nlmclnt_done(server->nlm_host); 645 nlmclnt_done(server->nlm_host);
640} 646}
641 647
@@ -657,7 +663,8 @@ static int nfs_start_lockd(struct nfs_server *server)
657 663
658 if (nlm_init.nfs_version > 3) 664 if (nlm_init.nfs_version > 3)
659 return 0; 665 return 0;
660 if (server->flags & NFS_MOUNT_NONLM) 666 if ((server->flags & NFS_MOUNT_LOCAL_FLOCK) &&
667 (server->flags & NFS_MOUNT_LOCAL_FCNTL))
661 return 0; 668 return 0;
662 669
663 switch (clp->cl_proto) { 670 switch (clp->cl_proto) {
@@ -898,11 +905,13 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
898 if (server->wsize > NFS_MAX_FILE_IO_SIZE) 905 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
899 server->wsize = NFS_MAX_FILE_IO_SIZE; 906 server->wsize = NFS_MAX_FILE_IO_SIZE;
900 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 907 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
908 set_pnfs_layoutdriver(server, fsinfo->layouttype);
909
901 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); 910 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
902 911
903 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); 912 server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
904 if (server->dtsize > PAGE_CACHE_SIZE) 913 if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES)
905 server->dtsize = PAGE_CACHE_SIZE; 914 server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES;
906 if (server->dtsize > server->rsize) 915 if (server->dtsize > server->rsize)
907 server->dtsize = server->rsize; 916 server->dtsize = server->rsize;
908 917
@@ -913,6 +922,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
913 922
914 server->maxfilesize = fsinfo->maxfilesize; 923 server->maxfilesize = fsinfo->maxfilesize;
915 924
925 server->time_delta = fsinfo->time_delta;
926
916 /* We're airborne Set socket buffersize */ 927 /* We're airborne Set socket buffersize */
917 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); 928 rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
918} 929}
@@ -935,6 +946,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str
935 } 946 }
936 947
937 fsinfo.fattr = fattr; 948 fsinfo.fattr = fattr;
949 fsinfo.layouttype = 0;
938 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); 950 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
939 if (error < 0) 951 if (error < 0)
940 goto out_error; 952 goto out_error;
@@ -1017,6 +1029,7 @@ void nfs_free_server(struct nfs_server *server)
1017{ 1029{
1018 dprintk("--> nfs_free_server()\n"); 1030 dprintk("--> nfs_free_server()\n");
1019 1031
1032 unset_pnfs_layoutdriver(server);
1020 spin_lock(&nfs_client_lock); 1033 spin_lock(&nfs_client_lock);
1021 list_del(&server->client_link); 1034 list_del(&server->client_link);
1022 list_del(&server->master_link); 1035 list_del(&server->master_link);
@@ -1356,8 +1369,9 @@ static int nfs4_init_server(struct nfs_server *server,
1356 1369
1357 /* Initialise the client representation from the mount data */ 1370 /* Initialise the client representation from the mount data */
1358 server->flags = data->flags; 1371 server->flags = data->flags;
1359 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| 1372 server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
1360 NFS_CAP_POSIX_LOCK; 1373 if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
1374 server->caps |= NFS_CAP_READDIRPLUS;
1361 server->options = data->options; 1375 server->options = data->options;
1362 1376
1363 /* Get a client record */ 1377 /* Get a client record */
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 0fac7fea18ef..07ac3847e562 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -33,11 +33,12 @@
33#include <linux/namei.h> 33#include <linux/namei.h>
34#include <linux/mount.h> 34#include <linux/mount.h>
35#include <linux/sched.h> 35#include <linux/sched.h>
36#include <linux/vmalloc.h>
36 37
37#include "nfs4_fs.h"
38#include "delegation.h" 38#include "delegation.h"
39#include "iostat.h" 39#include "iostat.h"
40#include "internal.h" 40#include "internal.h"
41#include "fscache.h"
41 42
42/* #define NFS_DEBUG_VERBOSE 1 */ 43/* #define NFS_DEBUG_VERBOSE 1 */
43 44
@@ -55,6 +56,7 @@ static int nfs_rename(struct inode *, struct dentry *,
55 struct inode *, struct dentry *); 56 struct inode *, struct dentry *);
56static int nfs_fsync_dir(struct file *, int); 57static int nfs_fsync_dir(struct file *, int);
57static loff_t nfs_llseek_dir(struct file *, loff_t, int); 58static loff_t nfs_llseek_dir(struct file *, loff_t, int);
59static int nfs_readdir_clear_array(struct page*, gfp_t);
58 60
59const struct file_operations nfs_dir_operations = { 61const struct file_operations nfs_dir_operations = {
60 .llseek = nfs_llseek_dir, 62 .llseek = nfs_llseek_dir,
@@ -80,6 +82,10 @@ const struct inode_operations nfs_dir_inode_operations = {
80 .setattr = nfs_setattr, 82 .setattr = nfs_setattr,
81}; 83};
82 84
85const struct address_space_operations nfs_dir_addr_space_ops = {
86 .releasepage = nfs_readdir_clear_array,
87};
88
83#ifdef CONFIG_NFS_V3 89#ifdef CONFIG_NFS_V3
84const struct inode_operations nfs3_dir_inode_operations = { 90const struct inode_operations nfs3_dir_inode_operations = {
85 .create = nfs_create, 91 .create = nfs_create,
@@ -104,8 +110,9 @@ const struct inode_operations nfs3_dir_inode_operations = {
104#ifdef CONFIG_NFS_V4 110#ifdef CONFIG_NFS_V4
105 111
106static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); 112static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
113static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd);
107const struct inode_operations nfs4_dir_inode_operations = { 114const struct inode_operations nfs4_dir_inode_operations = {
108 .create = nfs_create, 115 .create = nfs_open_create,
109 .lookup = nfs_atomic_lookup, 116 .lookup = nfs_atomic_lookup,
110 .link = nfs_link, 117 .link = nfs_link,
111 .unlink = nfs_unlink, 118 .unlink = nfs_unlink,
@@ -150,51 +157,197 @@ nfs_opendir(struct inode *inode, struct file *filp)
150 return res; 157 return res;
151} 158}
152 159
153typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int); 160struct nfs_cache_array_entry {
161 u64 cookie;
162 u64 ino;
163 struct qstr string;
164};
165
166struct nfs_cache_array {
167 unsigned int size;
168 int eof_index;
169 u64 last_cookie;
170 struct nfs_cache_array_entry array[0];
171};
172
173#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
174
175typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
154typedef struct { 176typedef struct {
155 struct file *file; 177 struct file *file;
156 struct page *page; 178 struct page *page;
157 unsigned long page_index; 179 unsigned long page_index;
158 __be32 *ptr;
159 u64 *dir_cookie; 180 u64 *dir_cookie;
160 loff_t current_index; 181 loff_t current_index;
161 struct nfs_entry *entry;
162 decode_dirent_t decode; 182 decode_dirent_t decode;
163 int plus; 183
164 unsigned long timestamp; 184 unsigned long timestamp;
165 unsigned long gencount; 185 unsigned long gencount;
166 int timestamp_valid; 186 unsigned int cache_entry_index;
187 unsigned int plus:1;
188 unsigned int eof:1;
167} nfs_readdir_descriptor_t; 189} nfs_readdir_descriptor_t;
168 190
169/* Now we cache directories properly, by stuffing the dirent 191/*
170 * data directly in the page cache. 192 * The caller is responsible for calling nfs_readdir_release_array(page)
171 *
172 * Inode invalidation due to refresh etc. takes care of
173 * _everything_, no sloppy entry flushing logic, no extraneous
174 * copying, network direct to page cache, the way it was meant
175 * to be.
176 *
177 * NOTE: Dirent information verification is done always by the
178 * page-in of the RPC reply, nowhere else, this simplies
179 * things substantially.
180 */ 193 */
181static 194static
182int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) 195struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
196{
197 if (page == NULL)
198 return ERR_PTR(-EIO);
199 return (struct nfs_cache_array *)kmap(page);
200}
201
202static
203void nfs_readdir_release_array(struct page *page)
204{
205 kunmap(page);
206}
207
208/*
209 * we are freeing strings created by nfs_add_to_readdir_array()
210 */
211static
212int nfs_readdir_clear_array(struct page *page, gfp_t mask)
213{
214 struct nfs_cache_array *array = nfs_readdir_get_array(page);
215 int i;
216 for (i = 0; i < array->size; i++)
217 kfree(array->array[i].string.name);
218 nfs_readdir_release_array(page);
219 return 0;
220}
221
222/*
223 * the caller is responsible for freeing qstr.name
224 * when called by nfs_readdir_add_to_array, the strings will be freed in
225 * nfs_clear_readdir_array()
226 */
227static
228int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
229{
230 string->len = len;
231 string->name = kmemdup(name, len, GFP_KERNEL);
232 if (string->name == NULL)
233 return -ENOMEM;
234 string->hash = full_name_hash(name, len);
235 return 0;
236}
237
238static
239int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
240{
241 struct nfs_cache_array *array = nfs_readdir_get_array(page);
242 struct nfs_cache_array_entry *cache_entry;
243 int ret;
244
245 if (IS_ERR(array))
246 return PTR_ERR(array);
247 ret = -EIO;
248 if (array->size >= MAX_READDIR_ARRAY)
249 goto out;
250
251 cache_entry = &array->array[array->size];
252 cache_entry->cookie = entry->prev_cookie;
253 cache_entry->ino = entry->ino;
254 ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
255 if (ret)
256 goto out;
257 array->last_cookie = entry->cookie;
258 if (entry->eof == 1)
259 array->eof_index = array->size;
260 array->size++;
261out:
262 nfs_readdir_release_array(page);
263 return ret;
264}
265
266static
267int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
268{
269 loff_t diff = desc->file->f_pos - desc->current_index;
270 unsigned int index;
271
272 if (diff < 0)
273 goto out_eof;
274 if (diff >= array->size) {
275 if (array->eof_index > 0)
276 goto out_eof;
277 desc->current_index += array->size;
278 return -EAGAIN;
279 }
280
281 index = (unsigned int)diff;
282 *desc->dir_cookie = array->array[index].cookie;
283 desc->cache_entry_index = index;
284 if (index == array->eof_index)
285 desc->eof = 1;
286 return 0;
287out_eof:
288 desc->eof = 1;
289 return -EBADCOOKIE;
290}
291
292static
293int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
294{
295 int i;
296 int status = -EAGAIN;
297
298 for (i = 0; i < array->size; i++) {
299 if (i == array->eof_index) {
300 desc->eof = 1;
301 status = -EBADCOOKIE;
302 }
303 if (array->array[i].cookie == *desc->dir_cookie) {
304 desc->cache_entry_index = i;
305 status = 0;
306 break;
307 }
308 }
309
310 return status;
311}
312
313static
314int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
315{
316 struct nfs_cache_array *array;
317 int status = -EBADCOOKIE;
318
319 if (desc->dir_cookie == NULL)
320 goto out;
321
322 array = nfs_readdir_get_array(desc->page);
323 if (IS_ERR(array)) {
324 status = PTR_ERR(array);
325 goto out;
326 }
327
328 if (*desc->dir_cookie == 0)
329 status = nfs_readdir_search_for_pos(array, desc);
330 else
331 status = nfs_readdir_search_for_cookie(array, desc);
332
333 nfs_readdir_release_array(desc->page);
334out:
335 return status;
336}
337
338/* Fill a page with xdr information before transferring to the cache page */
339static
340int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
341 struct nfs_entry *entry, struct file *file, struct inode *inode)
183{ 342{
184 struct file *file = desc->file;
185 struct inode *inode = file->f_path.dentry->d_inode;
186 struct rpc_cred *cred = nfs_file_cred(file); 343 struct rpc_cred *cred = nfs_file_cred(file);
187 unsigned long timestamp, gencount; 344 unsigned long timestamp, gencount;
188 int error; 345 int error;
189 346
190 dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
191 __func__, (long long)desc->entry->cookie,
192 page->index);
193
194 again: 347 again:
195 timestamp = jiffies; 348 timestamp = jiffies;
196 gencount = nfs_inc_attr_generation_counter(); 349 gencount = nfs_inc_attr_generation_counter();
197 error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, 350 error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages,
198 NFS_SERVER(inode)->dtsize, desc->plus); 351 NFS_SERVER(inode)->dtsize, desc->plus);
199 if (error < 0) { 352 if (error < 0) {
200 /* We requested READDIRPLUS, but the server doesn't grok it */ 353 /* We requested READDIRPLUS, but the server doesn't grok it */
@@ -208,190 +361,292 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
208 } 361 }
209 desc->timestamp = timestamp; 362 desc->timestamp = timestamp;
210 desc->gencount = gencount; 363 desc->gencount = gencount;
211 desc->timestamp_valid = 1; 364error:
212 SetPageUptodate(page); 365 return error;
213 /* Ensure consistent page alignment of the data.
214 * Note: assumes we have exclusive access to this mapping either
215 * through inode->i_mutex or some other mechanism.
216 */
217 if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
218 /* Should never happen */
219 nfs_zap_mapping(inode, inode->i_mapping);
220 }
221 unlock_page(page);
222 return 0;
223 error:
224 unlock_page(page);
225 return -EIO;
226} 366}
227 367
228static inline 368/* Fill in an entry based on the xdr code stored in desc->page */
229int dir_decode(nfs_readdir_descriptor_t *desc) 369static
370int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream)
230{ 371{
231 __be32 *p = desc->ptr; 372 __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus);
232 p = desc->decode(p, desc->entry, desc->plus);
233 if (IS_ERR(p)) 373 if (IS_ERR(p))
234 return PTR_ERR(p); 374 return PTR_ERR(p);
235 desc->ptr = p; 375
236 if (desc->timestamp_valid) { 376 entry->fattr->time_start = desc->timestamp;
237 desc->entry->fattr->time_start = desc->timestamp; 377 entry->fattr->gencount = desc->gencount;
238 desc->entry->fattr->gencount = desc->gencount;
239 } else
240 desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
241 return 0; 378 return 0;
242} 379}
243 380
244static inline 381static
245void dir_page_release(nfs_readdir_descriptor_t *desc) 382int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
246{ 383{
247 kunmap(desc->page); 384 struct nfs_inode *node;
248 page_cache_release(desc->page); 385 if (dentry->d_inode == NULL)
249 desc->page = NULL; 386 goto different;
250 desc->ptr = NULL; 387 node = NFS_I(dentry->d_inode);
388 if (node->fh.size != entry->fh->size)
389 goto different;
390 if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0)
391 goto different;
392 return 1;
393different:
394 return 0;
251} 395}
252 396
253/* 397static
254 * Given a pointer to a buffer that has already been filled by a call 398void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
255 * to readdir, find the next entry with cookie '*desc->dir_cookie'.
256 *
257 * If the end of the buffer has been reached, return -EAGAIN, if not,
258 * return the offset within the buffer of the next entry to be
259 * read.
260 */
261static inline
262int find_dirent(nfs_readdir_descriptor_t *desc)
263{ 399{
264 struct nfs_entry *entry = desc->entry; 400 struct qstr filename = {
265 int loop_count = 0, 401 .len = entry->len,
266 status; 402 .name = entry->name,
403 };
404 struct dentry *dentry;
405 struct dentry *alias;
406 struct inode *dir = parent->d_inode;
407 struct inode *inode;
267 408
268 while((status = dir_decode(desc)) == 0) { 409 if (filename.name[0] == '.') {
269 dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", 410 if (filename.len == 1)
270 __func__, (unsigned long long)entry->cookie); 411 return;
271 if (entry->prev_cookie == *desc->dir_cookie) 412 if (filename.len == 2 && filename.name[1] == '.')
272 break; 413 return;
273 if (loop_count++ > 200) { 414 }
274 loop_count = 0; 415 filename.hash = full_name_hash(filename.name, filename.len);
275 schedule(); 416
417 dentry = d_lookup(parent, &filename);
418 if (dentry != NULL) {
419 if (nfs_same_file(dentry, entry)) {
420 nfs_refresh_inode(dentry->d_inode, entry->fattr);
421 goto out;
422 } else {
423 d_drop(dentry);
424 dput(dentry);
276 } 425 }
277 } 426 }
278 return status; 427
428 dentry = d_alloc(parent, &filename);
429 if (dentry == NULL)
430 return;
431
432 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
433 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
434 if (IS_ERR(inode))
435 goto out;
436
437 alias = d_materialise_unique(dentry, inode);
438 if (IS_ERR(alias))
439 goto out;
440 else if (alias) {
441 nfs_set_verifier(alias, nfs_save_change_attribute(dir));
442 dput(alias);
443 } else
444 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
445
446out:
447 dput(dentry);
448}
449
450/* Perform conversion from xdr to cache array */
451static
452void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
453 void *xdr_page, struct page *page, unsigned int buflen)
454{
455 struct xdr_stream stream;
456 struct xdr_buf buf;
457 __be32 *ptr = xdr_page;
458 int status;
459 struct nfs_cache_array *array;
460
461 buf.head->iov_base = xdr_page;
462 buf.head->iov_len = buflen;
463 buf.tail->iov_len = 0;
464 buf.page_base = 0;
465 buf.page_len = 0;
466 buf.buflen = buf.head->iov_len;
467 buf.len = buf.head->iov_len;
468
469 xdr_init_decode(&stream, &buf, ptr);
470
471
472 do {
473 status = xdr_decode(desc, entry, &stream);
474 if (status != 0)
475 break;
476
477 if (nfs_readdir_add_to_array(entry, page) == -1)
478 break;
479 if (desc->plus == 1)
480 nfs_prime_dcache(desc->file->f_path.dentry, entry);
481 } while (!entry->eof);
482
483 if (status == -EBADCOOKIE && entry->eof) {
484 array = nfs_readdir_get_array(page);
485 array->eof_index = array->size - 1;
486 status = 0;
487 nfs_readdir_release_array(page);
488 }
489}
490
491static
492void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages)
493{
494 unsigned int i;
495 for (i = 0; i < npages; i++)
496 put_page(pages[i]);
497}
498
499static
500void nfs_readdir_free_large_page(void *ptr, struct page **pages,
501 unsigned int npages)
502{
503 vm_unmap_ram(ptr, npages);
504 nfs_readdir_free_pagearray(pages, npages);
279} 505}
280 506
281/* 507/*
282 * Given a pointer to a buffer that has already been filled by a call 508 * nfs_readdir_large_page will allocate pages that must be freed with a call
283 * to readdir, find the entry at offset 'desc->file->f_pos'. 509 * to nfs_readdir_free_large_page
284 *
285 * If the end of the buffer has been reached, return -EAGAIN, if not,
286 * return the offset within the buffer of the next entry to be
287 * read.
288 */ 510 */
289static inline 511static
290int find_dirent_index(nfs_readdir_descriptor_t *desc) 512void *nfs_readdir_large_page(struct page **pages, unsigned int npages)
291{ 513{
292 struct nfs_entry *entry = desc->entry; 514 void *ptr;
293 int loop_count = 0, 515 unsigned int i;
294 status; 516
517 for (i = 0; i < npages; i++) {
518 struct page *page = alloc_page(GFP_KERNEL);
519 if (page == NULL)
520 goto out_freepages;
521 pages[i] = page;
522 }
295 523
296 for(;;) { 524 ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL);
297 status = dir_decode(desc); 525 if (!IS_ERR_OR_NULL(ptr))
298 if (status) 526 return ptr;
299 break; 527out_freepages:
528 nfs_readdir_free_pagearray(pages, i);
529 return NULL;
530}
531
532static
533int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
534{
535 struct page *pages[NFS_MAX_READDIR_PAGES];
536 void *pages_ptr = NULL;
537 struct nfs_entry entry;
538 struct file *file = desc->file;
539 struct nfs_cache_array *array;
540 int status = 0;
541 unsigned int array_size = ARRAY_SIZE(pages);
542
543 entry.prev_cookie = 0;
544 entry.cookie = *desc->dir_cookie;
545 entry.eof = 0;
546 entry.fh = nfs_alloc_fhandle();
547 entry.fattr = nfs_alloc_fattr();
548 if (entry.fh == NULL || entry.fattr == NULL)
549 goto out;
300 550
301 dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n", 551 array = nfs_readdir_get_array(page);
302 (unsigned long long)entry->cookie, desc->current_index); 552 memset(array, 0, sizeof(struct nfs_cache_array));
553 array->eof_index = -1;
303 554
304 if (desc->file->f_pos == desc->current_index) { 555 pages_ptr = nfs_readdir_large_page(pages, array_size);
305 *desc->dir_cookie = entry->cookie; 556 if (!pages_ptr)
557 goto out_release_array;
558 do {
559 status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
560
561 if (status < 0)
306 break; 562 break;
307 } 563 nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE);
308 desc->current_index++; 564 } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
309 if (loop_count++ > 200) { 565
310 loop_count = 0; 566 nfs_readdir_free_large_page(pages_ptr, pages, array_size);
311 schedule(); 567out_release_array:
312 } 568 nfs_readdir_release_array(page);
313 } 569out:
570 nfs_free_fattr(entry.fattr);
571 nfs_free_fhandle(entry.fh);
314 return status; 572 return status;
315} 573}
316 574
317/* 575/*
318 * Find the given page, and call find_dirent() or find_dirent_index in 576 * Now we cache directories properly, by converting xdr information
319 * order to try to return the next entry. 577 * to an array that can be used for lookups later. This results in
578 * fewer cache pages, since we can store more information on each page.
579 * We only need to convert from xdr once so future lookups are much simpler
320 */ 580 */
321static inline 581static
322int find_dirent_page(nfs_readdir_descriptor_t *desc) 582int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
323{ 583{
324 struct inode *inode = desc->file->f_path.dentry->d_inode; 584 struct inode *inode = desc->file->f_path.dentry->d_inode;
325 struct page *page;
326 int status;
327 585
328 dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", 586 if (nfs_readdir_xdr_to_array(desc, page, inode) < 0)
329 __func__, desc->page_index, 587 goto error;
330 (long long) *desc->dir_cookie); 588 SetPageUptodate(page);
331 589
332 /* If we find the page in the page_cache, we cannot be sure 590 if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
333 * how fresh the data is, so we will ignore readdir_plus attributes. 591 /* Should never happen */
334 */ 592 nfs_zap_mapping(inode, inode->i_mapping);
335 desc->timestamp_valid = 0;
336 page = read_cache_page(inode->i_mapping, desc->page_index,
337 (filler_t *)nfs_readdir_filler, desc);
338 if (IS_ERR(page)) {
339 status = PTR_ERR(page);
340 goto out;
341 } 593 }
594 unlock_page(page);
595 return 0;
596 error:
597 unlock_page(page);
598 return -EIO;
599}
342 600
343 /* NOTE: Someone else may have changed the READDIRPLUS flag */ 601static
344 desc->page = page; 602void cache_page_release(nfs_readdir_descriptor_t *desc)
345 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ 603{
346 if (*desc->dir_cookie != 0) 604 page_cache_release(desc->page);
347 status = find_dirent(desc); 605 desc->page = NULL;
348 else 606}
349 status = find_dirent_index(desc); 607
350 if (status < 0) 608static
351 dir_page_release(desc); 609struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
352 out: 610{
353 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); 611 struct page *page;
354 return status; 612 page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
613 desc->page_index, (filler_t *)nfs_readdir_filler, desc);
614 if (IS_ERR(page))
615 desc->eof = 1;
616 return page;
355} 617}
356 618
357/* 619/*
358 * Recurse through the page cache pages, and return a 620 * Returns 0 if desc->dir_cookie was found on page desc->page_index
359 * filled nfs_entry structure of the next directory entry if possible.
360 *
361 * The target for the search is '*desc->dir_cookie' if non-0,
362 * 'desc->file->f_pos' otherwise
363 */ 621 */
622static
623int find_cache_page(nfs_readdir_descriptor_t *desc)
624{
625 int res;
626
627 desc->page = get_cache_page(desc);
628 if (IS_ERR(desc->page))
629 return PTR_ERR(desc->page);
630
631 res = nfs_readdir_search_array(desc);
632 if (res == 0)
633 return 0;
634 cache_page_release(desc);
635 return res;
636}
637
638/* Search for desc->dir_cookie from the beginning of the page cache */
364static inline 639static inline
365int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) 640int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
366{ 641{
367 int loop_count = 0; 642 int res = -EAGAIN;
368 int res;
369
370 /* Always search-by-index from the beginning of the cache */
371 if (*desc->dir_cookie == 0) {
372 dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n",
373 (long long)desc->file->f_pos);
374 desc->page_index = 0;
375 desc->entry->cookie = desc->entry->prev_cookie = 0;
376 desc->entry->eof = 0;
377 desc->current_index = 0;
378 } else
379 dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n",
380 (unsigned long long)*desc->dir_cookie);
381 643
382 for (;;) { 644 while (1) {
383 res = find_dirent_page(desc); 645 res = find_cache_page(desc);
384 if (res != -EAGAIN) 646 if (res != -EAGAIN)
385 break; 647 break;
386 /* Align to beginning of next page */ 648 desc->page_index++;
387 desc->page_index ++;
388 if (loop_count++ > 200) {
389 loop_count = 0;
390 schedule();
391 }
392 } 649 }
393
394 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, res);
395 return res; 650 return res;
396} 651}
397 652
@@ -400,8 +655,6 @@ static inline unsigned int dt_type(struct inode *inode)
400 return (inode->i_mode >> 12) & 15; 655 return (inode->i_mode >> 12) & 15;
401} 656}
402 657
403static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc);
404
405/* 658/*
406 * Once we've found the start of the dirent within a page: fill 'er up... 659 * Once we've found the start of the dirent within a page: fill 'er up...
407 */ 660 */
@@ -410,49 +663,36 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
410 filldir_t filldir) 663 filldir_t filldir)
411{ 664{
412 struct file *file = desc->file; 665 struct file *file = desc->file;
413 struct nfs_entry *entry = desc->entry; 666 int i = 0;
414 struct dentry *dentry = NULL; 667 int res = 0;
415 u64 fileid; 668 struct nfs_cache_array *array = NULL;
416 int loop_count = 0, 669 unsigned int d_type = DT_UNKNOWN;
417 res; 670 struct dentry *dentry = NULL;
418
419 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n",
420 (unsigned long long)entry->cookie);
421
422 for(;;) {
423 unsigned d_type = DT_UNKNOWN;
424 /* Note: entry->prev_cookie contains the cookie for
425 * retrieving the current dirent on the server */
426 fileid = entry->ino;
427
428 /* Get a dentry if we have one */
429 if (dentry != NULL)
430 dput(dentry);
431 dentry = nfs_readdir_lookup(desc);
432 671
433 /* Use readdirplus info */ 672 array = nfs_readdir_get_array(desc->page);
434 if (dentry != NULL && dentry->d_inode != NULL) {
435 d_type = dt_type(dentry->d_inode);
436 fileid = NFS_FILEID(dentry->d_inode);
437 }
438 673
439 res = filldir(dirent, entry->name, entry->len, 674 for (i = desc->cache_entry_index; i < array->size; i++) {
440 file->f_pos, nfs_compat_user_ino64(fileid), 675 d_type = DT_UNKNOWN;
441 d_type); 676
677 res = filldir(dirent, array->array[i].string.name,
678 array->array[i].string.len, file->f_pos,
679 nfs_compat_user_ino64(array->array[i].ino), d_type);
442 if (res < 0) 680 if (res < 0)
443 break; 681 break;
444 file->f_pos++; 682 file->f_pos++;
445 *desc->dir_cookie = entry->cookie; 683 desc->cache_entry_index = i;
446 if (dir_decode(desc) != 0) { 684 if (i < (array->size-1))
447 desc->page_index ++; 685 *desc->dir_cookie = array->array[i+1].cookie;
686 else
687 *desc->dir_cookie = array->last_cookie;
688 if (i == array->eof_index) {
689 desc->eof = 1;
448 break; 690 break;
449 } 691 }
450 if (loop_count++ > 200) {
451 loop_count = 0;
452 schedule();
453 }
454 } 692 }
455 dir_page_release(desc); 693
694 nfs_readdir_release_array(desc->page);
695 cache_page_release(desc);
456 if (dentry != NULL) 696 if (dentry != NULL)
457 dput(dentry); 697 dput(dentry);
458 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", 698 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
@@ -476,12 +716,9 @@ static inline
476int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, 716int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
477 filldir_t filldir) 717 filldir_t filldir)
478{ 718{
479 struct file *file = desc->file;
480 struct inode *inode = file->f_path.dentry->d_inode;
481 struct rpc_cred *cred = nfs_file_cred(file);
482 struct page *page = NULL; 719 struct page *page = NULL;
483 int status; 720 int status;
484 unsigned long timestamp, gencount; 721 struct inode *inode = desc->file->f_path.dentry->d_inode;
485 722
486 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", 723 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
487 (unsigned long long)*desc->dir_cookie); 724 (unsigned long long)*desc->dir_cookie);
@@ -491,38 +728,22 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
491 status = -ENOMEM; 728 status = -ENOMEM;
492 goto out; 729 goto out;
493 } 730 }
494 timestamp = jiffies; 731
495 gencount = nfs_inc_attr_generation_counter(); 732 if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
496 status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred,
497 *desc->dir_cookie, page,
498 NFS_SERVER(inode)->dtsize,
499 desc->plus);
500 desc->page = page;
501 desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
502 if (status >= 0) {
503 desc->timestamp = timestamp;
504 desc->gencount = gencount;
505 desc->timestamp_valid = 1;
506 if ((status = dir_decode(desc)) == 0)
507 desc->entry->prev_cookie = *desc->dir_cookie;
508 } else
509 status = -EIO; 733 status = -EIO;
510 if (status < 0)
511 goto out_release; 734 goto out_release;
735 }
512 736
737 desc->page_index = 0;
738 desc->page = page;
513 status = nfs_do_filldir(desc, dirent, filldir); 739 status = nfs_do_filldir(desc, dirent, filldir);
514 740
515 /* Reset read descriptor so it searches the page cache from
516 * the start upon the next call to readdir_search_pagecache() */
517 desc->page_index = 0;
518 desc->entry->cookie = desc->entry->prev_cookie = 0;
519 desc->entry->eof = 0;
520 out: 741 out:
521 dfprintk(DIRCACHE, "NFS: %s: returns %d\n", 742 dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
522 __func__, status); 743 __func__, status);
523 return status; 744 return status;
524 out_release: 745 out_release:
525 dir_page_release(desc); 746 cache_page_release(desc);
526 goto out; 747 goto out;
527} 748}
528 749
@@ -536,7 +757,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
536 struct inode *inode = dentry->d_inode; 757 struct inode *inode = dentry->d_inode;
537 nfs_readdir_descriptor_t my_desc, 758 nfs_readdir_descriptor_t my_desc,
538 *desc = &my_desc; 759 *desc = &my_desc;
539 struct nfs_entry my_entry;
540 int res = -ENOMEM; 760 int res = -ENOMEM;
541 761
542 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 762 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -557,26 +777,17 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
557 desc->decode = NFS_PROTO(inode)->decode_dirent; 777 desc->decode = NFS_PROTO(inode)->decode_dirent;
558 desc->plus = NFS_USE_READDIRPLUS(inode); 778 desc->plus = NFS_USE_READDIRPLUS(inode);
559 779
560 my_entry.cookie = my_entry.prev_cookie = 0;
561 my_entry.eof = 0;
562 my_entry.fh = nfs_alloc_fhandle();
563 my_entry.fattr = nfs_alloc_fattr();
564 if (my_entry.fh == NULL || my_entry.fattr == NULL)
565 goto out_alloc_failed;
566
567 desc->entry = &my_entry;
568
569 nfs_block_sillyrename(dentry); 780 nfs_block_sillyrename(dentry);
570 res = nfs_revalidate_mapping(inode, filp->f_mapping); 781 res = nfs_revalidate_mapping(inode, filp->f_mapping);
571 if (res < 0) 782 if (res < 0)
572 goto out; 783 goto out;
573 784
574 while(!desc->entry->eof) { 785 while (desc->eof != 1) {
575 res = readdir_search_pagecache(desc); 786 res = readdir_search_pagecache(desc);
576 787
577 if (res == -EBADCOOKIE) { 788 if (res == -EBADCOOKIE) {
578 /* This means either end of directory */ 789 /* This means either end of directory */
579 if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { 790 if (*desc->dir_cookie && desc->eof == 0) {
580 /* Or that the server has 'lost' a cookie */ 791 /* Or that the server has 'lost' a cookie */
581 res = uncached_readdir(desc, dirent, filldir); 792 res = uncached_readdir(desc, dirent, filldir);
582 if (res >= 0) 793 if (res >= 0)
@@ -588,8 +799,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
588 if (res == -ETOOSMALL && desc->plus) { 799 if (res == -ETOOSMALL && desc->plus) {
589 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 800 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
590 nfs_zap_caches(inode); 801 nfs_zap_caches(inode);
802 desc->page_index = 0;
591 desc->plus = 0; 803 desc->plus = 0;
592 desc->entry->eof = 0; 804 desc->eof = 0;
593 continue; 805 continue;
594 } 806 }
595 if (res < 0) 807 if (res < 0)
@@ -605,9 +817,6 @@ out:
605 nfs_unblock_sillyrename(dentry); 817 nfs_unblock_sillyrename(dentry);
606 if (res > 0) 818 if (res > 0)
607 res = 0; 819 res = 0;
608out_alloc_failed:
609 nfs_free_fattr(my_entry.fattr);
610 nfs_free_fhandle(my_entry.fh);
611 dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", 820 dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n",
612 dentry->d_parent->d_name.name, dentry->d_name.name, 821 dentry->d_parent->d_name.name, dentry->d_name.name,
613 res); 822 res);
@@ -1029,10 +1238,63 @@ static int is_atomic_open(struct nameidata *nd)
1029 return 1; 1238 return 1;
1030} 1239}
1031 1240
1241static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd)
1242{
1243 struct path path = {
1244 .mnt = nd->path.mnt,
1245 .dentry = dentry,
1246 };
1247 struct nfs_open_context *ctx;
1248 struct rpc_cred *cred;
1249 fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
1250
1251 cred = rpc_lookup_cred();
1252 if (IS_ERR(cred))
1253 return ERR_CAST(cred);
1254 ctx = alloc_nfs_open_context(&path, cred, fmode);
1255 put_rpccred(cred);
1256 if (ctx == NULL)
1257 return ERR_PTR(-ENOMEM);
1258 return ctx;
1259}
1260
1261static int do_open(struct inode *inode, struct file *filp)
1262{
1263 nfs_fscache_set_inode_cookie(inode, filp);
1264 return 0;
1265}
1266
1267static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx)
1268{
1269 struct file *filp;
1270 int ret = 0;
1271
1272 /* If the open_intent is for execute, we have an extra check to make */
1273 if (ctx->mode & FMODE_EXEC) {
1274 ret = nfs_may_open(ctx->path.dentry->d_inode,
1275 ctx->cred,
1276 nd->intent.open.flags);
1277 if (ret < 0)
1278 goto out;
1279 }
1280 filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open);
1281 if (IS_ERR(filp))
1282 ret = PTR_ERR(filp);
1283 else
1284 nfs_file_set_open_context(filp, ctx);
1285out:
1286 put_nfs_open_context(ctx);
1287 return ret;
1288}
1289
1032static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 1290static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1033{ 1291{
1292 struct nfs_open_context *ctx;
1293 struct iattr attr;
1034 struct dentry *res = NULL; 1294 struct dentry *res = NULL;
1035 int error; 1295 struct inode *inode;
1296 int open_flags;
1297 int err;
1036 1298
1037 dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", 1299 dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n",
1038 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1300 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1054,13 +1316,32 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1054 goto out; 1316 goto out;
1055 } 1317 }
1056 1318
1319 ctx = nameidata_to_nfs_open_context(dentry, nd);
1320 res = ERR_CAST(ctx);
1321 if (IS_ERR(ctx))
1322 goto out;
1323
1324 open_flags = nd->intent.open.flags;
1325 if (nd->flags & LOOKUP_CREATE) {
1326 attr.ia_mode = nd->intent.open.create_mode;
1327 attr.ia_valid = ATTR_MODE;
1328 if (!IS_POSIXACL(dir))
1329 attr.ia_mode &= ~current_umask();
1330 } else {
1331 open_flags &= ~(O_EXCL | O_CREAT);
1332 attr.ia_valid = 0;
1333 }
1334
1057 /* Open the file on the server */ 1335 /* Open the file on the server */
1058 res = nfs4_atomic_open(dir, dentry, nd); 1336 nfs_block_sillyrename(dentry->d_parent);
1059 if (IS_ERR(res)) { 1337 inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr);
1060 error = PTR_ERR(res); 1338 if (IS_ERR(inode)) {
1061 switch (error) { 1339 nfs_unblock_sillyrename(dentry->d_parent);
1340 put_nfs_open_context(ctx);
1341 switch (PTR_ERR(inode)) {
1062 /* Make a negative dentry */ 1342 /* Make a negative dentry */
1063 case -ENOENT: 1343 case -ENOENT:
1344 d_add(dentry, NULL);
1064 res = NULL; 1345 res = NULL;
1065 goto out; 1346 goto out;
1066 /* This turned out not to be a regular file */ 1347 /* This turned out not to be a regular file */
@@ -1072,11 +1353,25 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1072 goto no_open; 1353 goto no_open;
1073 /* case -EINVAL: */ 1354 /* case -EINVAL: */
1074 default: 1355 default:
1356 res = ERR_CAST(inode);
1075 goto out; 1357 goto out;
1076 } 1358 }
1077 } else if (res != NULL) 1359 }
1360 res = d_add_unique(dentry, inode);
1361 nfs_unblock_sillyrename(dentry->d_parent);
1362 if (res != NULL) {
1363 dput(ctx->path.dentry);
1364 ctx->path.dentry = dget(res);
1078 dentry = res; 1365 dentry = res;
1366 }
1367 err = nfs_intent_set_file(nd, ctx);
1368 if (err < 0) {
1369 if (res != NULL)
1370 dput(res);
1371 return ERR_PTR(err);
1372 }
1079out: 1373out:
1374 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1080 return res; 1375 return res;
1081no_open: 1376no_open:
1082 return nfs_lookup(dir, dentry, nd); 1377 return nfs_lookup(dir, dentry, nd);
@@ -1087,12 +1382,15 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1087 struct dentry *parent = NULL; 1382 struct dentry *parent = NULL;
1088 struct inode *inode = dentry->d_inode; 1383 struct inode *inode = dentry->d_inode;
1089 struct inode *dir; 1384 struct inode *dir;
1385 struct nfs_open_context *ctx;
1090 int openflags, ret = 0; 1386 int openflags, ret = 0;
1091 1387
1092 if (!is_atomic_open(nd) || d_mountpoint(dentry)) 1388 if (!is_atomic_open(nd) || d_mountpoint(dentry))
1093 goto no_open; 1389 goto no_open;
1390
1094 parent = dget_parent(dentry); 1391 parent = dget_parent(dentry);
1095 dir = parent->d_inode; 1392 dir = parent->d_inode;
1393
1096 /* We can't create new files in nfs_open_revalidate(), so we 1394 /* We can't create new files in nfs_open_revalidate(), so we
1097 * optimize away revalidation of negative dentries. 1395 * optimize away revalidation of negative dentries.
1098 */ 1396 */
@@ -1112,99 +1410,96 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1112 /* We can't create new files, or truncate existing ones here */ 1410 /* We can't create new files, or truncate existing ones here */
1113 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); 1411 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
1114 1412
1413 ctx = nameidata_to_nfs_open_context(dentry, nd);
1414 ret = PTR_ERR(ctx);
1415 if (IS_ERR(ctx))
1416 goto out;
1115 /* 1417 /*
1116 * Note: we're not holding inode->i_mutex and so may be racing with 1418 * Note: we're not holding inode->i_mutex and so may be racing with
1117 * operations that change the directory. We therefore save the 1419 * operations that change the directory. We therefore save the
1118 * change attribute *before* we do the RPC call. 1420 * change attribute *before* we do the RPC call.
1119 */ 1421 */
1120 ret = nfs4_open_revalidate(dir, dentry, openflags, nd); 1422 inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL);
1423 if (IS_ERR(inode)) {
1424 ret = PTR_ERR(inode);
1425 switch (ret) {
1426 case -EPERM:
1427 case -EACCES:
1428 case -EDQUOT:
1429 case -ENOSPC:
1430 case -EROFS:
1431 goto out_put_ctx;
1432 default:
1433 goto out_drop;
1434 }
1435 }
1436 iput(inode);
1437 if (inode != dentry->d_inode)
1438 goto out_drop;
1439
1440 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1441 ret = nfs_intent_set_file(nd, ctx);
1442 if (ret >= 0)
1443 ret = 1;
1121out: 1444out:
1122 dput(parent); 1445 dput(parent);
1123 if (!ret)
1124 d_drop(dentry);
1125 return ret; 1446 return ret;
1447out_drop:
1448 d_drop(dentry);
1449 ret = 0;
1450out_put_ctx:
1451 put_nfs_open_context(ctx);
1452 goto out;
1453
1126no_open_dput: 1454no_open_dput:
1127 dput(parent); 1455 dput(parent);
1128no_open: 1456no_open:
1129 return nfs_lookup_revalidate(dentry, nd); 1457 return nfs_lookup_revalidate(dentry, nd);
1130} 1458}
1131#endif /* CONFIG_NFSV4 */
1132 1459
1133static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) 1460static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
1461 struct nameidata *nd)
1134{ 1462{
1135 struct dentry *parent = desc->file->f_path.dentry; 1463 struct nfs_open_context *ctx = NULL;
1136 struct inode *dir = parent->d_inode; 1464 struct iattr attr;
1137 struct nfs_entry *entry = desc->entry; 1465 int error;
1138 struct dentry *dentry, *alias; 1466 int open_flags = 0;
1139 struct qstr name = {
1140 .name = entry->name,
1141 .len = entry->len,
1142 };
1143 struct inode *inode;
1144 unsigned long verf = nfs_save_change_attribute(dir);
1145 1467
1146 switch (name.len) { 1468 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1147 case 2: 1469 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
1148 if (name.name[0] == '.' && name.name[1] == '.')
1149 return dget_parent(parent);
1150 break;
1151 case 1:
1152 if (name.name[0] == '.')
1153 return dget(parent);
1154 }
1155 1470
1156 spin_lock(&dir->i_lock); 1471 attr.ia_mode = mode;
1157 if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) { 1472 attr.ia_valid = ATTR_MODE;
1158 spin_unlock(&dir->i_lock);
1159 return NULL;
1160 }
1161 spin_unlock(&dir->i_lock);
1162 1473
1163 name.hash = full_name_hash(name.name, name.len); 1474 if ((nd->flags & LOOKUP_CREATE) != 0) {
1164 dentry = d_lookup(parent, &name); 1475 open_flags = nd->intent.open.flags;
1165 if (dentry != NULL) {
1166 /* Is this a positive dentry that matches the readdir info? */
1167 if (dentry->d_inode != NULL &&
1168 (NFS_FILEID(dentry->d_inode) == entry->ino ||
1169 d_mountpoint(dentry))) {
1170 if (!desc->plus || entry->fh->size == 0)
1171 return dentry;
1172 if (nfs_compare_fh(NFS_FH(dentry->d_inode),
1173 entry->fh) == 0)
1174 goto out_renew;
1175 }
1176 /* No, so d_drop to allow one to be created */
1177 d_drop(dentry);
1178 dput(dentry);
1179 }
1180 if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
1181 return NULL;
1182 if (name.len > NFS_SERVER(dir)->namelen)
1183 return NULL;
1184 /* Note: caller is already holding the dir->i_mutex! */
1185 dentry = d_alloc(parent, &name);
1186 if (dentry == NULL)
1187 return NULL;
1188 dentry->d_op = NFS_PROTO(dir)->dentry_ops;
1189 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
1190 if (IS_ERR(inode)) {
1191 dput(dentry);
1192 return NULL;
1193 }
1194 1476
1195 alias = d_materialise_unique(dentry, inode); 1477 ctx = nameidata_to_nfs_open_context(dentry, nd);
1196 if (alias != NULL) { 1478 error = PTR_ERR(ctx);
1197 dput(dentry); 1479 if (IS_ERR(ctx))
1198 if (IS_ERR(alias)) 1480 goto out_err_drop;
1199 return NULL;
1200 dentry = alias;
1201 } 1481 }
1202 1482
1203out_renew: 1483 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
1204 nfs_set_verifier(dentry, verf); 1484 if (error != 0)
1205 return dentry; 1485 goto out_put_ctx;
1486 if (ctx != NULL) {
1487 error = nfs_intent_set_file(nd, ctx);
1488 if (error < 0)
1489 goto out_err;
1490 }
1491 return 0;
1492out_put_ctx:
1493 if (ctx != NULL)
1494 put_nfs_open_context(ctx);
1495out_err_drop:
1496 d_drop(dentry);
1497out_err:
1498 return error;
1206} 1499}
1207 1500
1501#endif /* CONFIG_NFSV4 */
1502
1208/* 1503/*
1209 * Code common to create, mkdir, and mknod. 1504 * Code common to create, mkdir, and mknod.
1210 */ 1505 */
@@ -1258,7 +1553,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1258{ 1553{
1259 struct iattr attr; 1554 struct iattr attr;
1260 int error; 1555 int error;
1261 int open_flags = 0;
1262 1556
1263 dfprintk(VFS, "NFS: create(%s/%ld), %s\n", 1557 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1264 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1558 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1266,10 +1560,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1266 attr.ia_mode = mode; 1560 attr.ia_mode = mode;
1267 attr.ia_valid = ATTR_MODE; 1561 attr.ia_valid = ATTR_MODE;
1268 1562
1269 if ((nd->flags & LOOKUP_CREATE) != 0) 1563 error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL);
1270 open_flags = nd->intent.open.flags;
1271
1272 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd);
1273 if (error != 0) 1564 if (error != 0)
1274 goto out_err; 1565 goto out_err;
1275 return 0; 1566 return 0;
@@ -1351,76 +1642,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
1351 return error; 1642 return error;
1352} 1643}
1353 1644
1354static int nfs_sillyrename(struct inode *dir, struct dentry *dentry)
1355{
1356 static unsigned int sillycounter;
1357 const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
1358 const int countersize = sizeof(sillycounter)*2;
1359 const int slen = sizeof(".nfs")+fileidsize+countersize-1;
1360 char silly[slen+1];
1361 struct qstr qsilly;
1362 struct dentry *sdentry;
1363 int error = -EIO;
1364
1365 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
1366 dentry->d_parent->d_name.name, dentry->d_name.name,
1367 atomic_read(&dentry->d_count));
1368 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
1369
1370 /*
1371 * We don't allow a dentry to be silly-renamed twice.
1372 */
1373 error = -EBUSY;
1374 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1375 goto out;
1376
1377 sprintf(silly, ".nfs%*.*Lx",
1378 fileidsize, fileidsize,
1379 (unsigned long long)NFS_FILEID(dentry->d_inode));
1380
1381 /* Return delegation in anticipation of the rename */
1382 nfs_inode_return_delegation(dentry->d_inode);
1383
1384 sdentry = NULL;
1385 do {
1386 char *suffix = silly + slen - countersize;
1387
1388 dput(sdentry);
1389 sillycounter++;
1390 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
1391
1392 dfprintk(VFS, "NFS: trying to rename %s to %s\n",
1393 dentry->d_name.name, silly);
1394
1395 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
1396 /*
1397 * N.B. Better to return EBUSY here ... it could be
1398 * dangerous to delete the file while it's in use.
1399 */
1400 if (IS_ERR(sdentry))
1401 goto out;
1402 } while(sdentry->d_inode != NULL); /* need negative lookup */
1403
1404 qsilly.name = silly;
1405 qsilly.len = strlen(silly);
1406 if (dentry->d_inode) {
1407 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1408 dir, &qsilly);
1409 nfs_mark_for_revalidate(dentry->d_inode);
1410 } else
1411 error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
1412 dir, &qsilly);
1413 if (!error) {
1414 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1415 d_move(dentry, sdentry);
1416 error = nfs_async_unlink(dir, dentry);
1417 /* If we return 0 we don't unlink */
1418 }
1419 dput(sdentry);
1420out:
1421 return error;
1422}
1423
1424/* 1645/*
1425 * Remove a file after making sure there are no pending writes, 1646 * Remove a file after making sure there are no pending writes,
1426 * and after checking that the file has only one user. 1647 * and after checking that the file has only one user.
@@ -1711,14 +1932,14 @@ static void nfs_access_free_list(struct list_head *head)
1711int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) 1932int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
1712{ 1933{
1713 LIST_HEAD(head); 1934 LIST_HEAD(head);
1714 struct nfs_inode *nfsi; 1935 struct nfs_inode *nfsi, *next;
1715 struct nfs_access_entry *cache; 1936 struct nfs_access_entry *cache;
1716 1937
1717 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) 1938 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
1718 return (nr_to_scan == 0) ? 0 : -1; 1939 return (nr_to_scan == 0) ? 0 : -1;
1719 1940
1720 spin_lock(&nfs_access_lru_lock); 1941 spin_lock(&nfs_access_lru_lock);
1721 list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { 1942 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
1722 struct inode *inode; 1943 struct inode *inode;
1723 1944
1724 if (nr_to_scan-- == 0) 1945 if (nr_to_scan-- == 0)
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index dba50a5625db..a6e711ad130f 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd,
167 return 0; 167 return 0;
168 } 168 }
169 item = container_of(h, struct nfs_dns_ent, h); 169 item = container_of(h, struct nfs_dns_ent, h);
170 ttl = (long)item->h.expiry_time - (long)get_seconds(); 170 ttl = item->h.expiry_time - seconds_since_boot();
171 if (ttl < 0) 171 if (ttl < 0)
172 ttl = 0; 172 ttl = 0;
173 173
@@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen)
239 ttl = get_expiry(&buf); 239 ttl = get_expiry(&buf);
240 if (ttl == 0) 240 if (ttl == 0)
241 goto out; 241 goto out;
242 key.h.expiry_time = ttl + get_seconds(); 242 key.h.expiry_time = ttl + seconds_since_boot();
243 243
244 ret = -ENOMEM; 244 ret = -ENOMEM;
245 item = nfs_dns_lookup(cd, &key); 245 item = nfs_dns_lookup(cd, &key);
@@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd,
301 goto out_err; 301 goto out_err;
302 ret = -ETIMEDOUT; 302 ret = -ETIMEDOUT;
303 if (!test_bit(CACHE_VALID, &(*item)->h.flags) 303 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
304 || (*item)->h.expiry_time < get_seconds() 304 || (*item)->h.expiry_time < seconds_since_boot()
305 || cd->flush_time > (*item)->h.last_refresh) 305 || cd->flush_time > (*item)->h.last_refresh)
306 goto out_put; 306 goto out_put;
307 ret = -ENOENT; 307 ret = -ENOENT;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 05bf3c0dc751..e756075637b0 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -36,6 +36,7 @@
36#include "internal.h" 36#include "internal.h"
37#include "iostat.h" 37#include "iostat.h"
38#include "fscache.h" 38#include "fscache.h"
39#include "pnfs.h"
39 40
40#define NFSDBG_FACILITY NFSDBG_FILE 41#define NFSDBG_FACILITY NFSDBG_FILE
41 42
@@ -386,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
386 file->f_path.dentry->d_name.name, 387 file->f_path.dentry->d_name.name,
387 mapping->host->i_ino, len, (long long) pos); 388 mapping->host->i_ino, len, (long long) pos);
388 389
390 pnfs_update_layout(mapping->host,
391 nfs_file_open_context(file),
392 IOMODE_RW);
393
389start: 394start:
390 /* 395 /*
391 * Prevent starvation issues if someone is doing a consistency 396 * Prevent starvation issues if someone is doing a consistency
@@ -551,7 +556,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
551 struct file *filp = vma->vm_file; 556 struct file *filp = vma->vm_file;
552 struct dentry *dentry = filp->f_path.dentry; 557 struct dentry *dentry = filp->f_path.dentry;
553 unsigned pagelen; 558 unsigned pagelen;
554 int ret = -EINVAL; 559 int ret = VM_FAULT_NOPAGE;
555 struct address_space *mapping; 560 struct address_space *mapping;
556 561
557 dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", 562 dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n",
@@ -567,21 +572,20 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
567 if (mapping != dentry->d_inode->i_mapping) 572 if (mapping != dentry->d_inode->i_mapping)
568 goto out_unlock; 573 goto out_unlock;
569 574
570 ret = 0;
571 pagelen = nfs_page_length(page); 575 pagelen = nfs_page_length(page);
572 if (pagelen == 0) 576 if (pagelen == 0)
573 goto out_unlock; 577 goto out_unlock;
574 578
575 ret = nfs_flush_incompatible(filp, page); 579 ret = VM_FAULT_LOCKED;
576 if (ret != 0) 580 if (nfs_flush_incompatible(filp, page) == 0 &&
577 goto out_unlock; 581 nfs_updatepage(filp, page, 0, pagelen) == 0)
582 goto out;
578 583
579 ret = nfs_updatepage(filp, page, 0, pagelen); 584 ret = VM_FAULT_SIGBUS;
580out_unlock: 585out_unlock:
581 if (!ret)
582 return VM_FAULT_LOCKED;
583 unlock_page(page); 586 unlock_page(page);
584 return VM_FAULT_SIGBUS; 587out:
588 return ret;
585} 589}
586 590
587static const struct vm_operations_struct nfs_file_vm_ops = { 591static const struct vm_operations_struct nfs_file_vm_ops = {
@@ -684,7 +688,8 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
684 return ret; 688 return ret;
685} 689}
686 690
687static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) 691static int
692do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
688{ 693{
689 struct inode *inode = filp->f_mapping->host; 694 struct inode *inode = filp->f_mapping->host;
690 int status = 0; 695 int status = 0;
@@ -699,7 +704,7 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
699 if (nfs_have_delegation(inode, FMODE_READ)) 704 if (nfs_have_delegation(inode, FMODE_READ))
700 goto out_noconflict; 705 goto out_noconflict;
701 706
702 if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) 707 if (is_local)
703 goto out_noconflict; 708 goto out_noconflict;
704 709
705 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 710 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
@@ -726,7 +731,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
726 return res; 731 return res;
727} 732}
728 733
729static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) 734static int
735do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
730{ 736{
731 struct inode *inode = filp->f_mapping->host; 737 struct inode *inode = filp->f_mapping->host;
732 int status; 738 int status;
@@ -741,15 +747,24 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
741 * If we're signalled while cleaning up locks on process exit, we 747 * If we're signalled while cleaning up locks on process exit, we
742 * still need to complete the unlock. 748 * still need to complete the unlock.
743 */ 749 */
744 /* Use local locking if mounted with "-onolock" */ 750 /*
745 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 751 * Use local locking if mounted with "-onolock" or with appropriate
752 * "-olocal_lock="
753 */
754 if (!is_local)
746 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 755 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
747 else 756 else
748 status = do_vfs_lock(filp, fl); 757 status = do_vfs_lock(filp, fl);
749 return status; 758 return status;
750} 759}
751 760
752static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) 761static int
762is_time_granular(struct timespec *ts) {
763 return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000));
764}
765
766static int
767do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
753{ 768{
754 struct inode *inode = filp->f_mapping->host; 769 struct inode *inode = filp->f_mapping->host;
755 int status; 770 int status;
@@ -762,20 +777,31 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
762 if (status != 0) 777 if (status != 0)
763 goto out; 778 goto out;
764 779
765 /* Use local locking if mounted with "-onolock" */ 780 /*
766 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 781 * Use local locking if mounted with "-onolock" or with appropriate
782 * "-olocal_lock="
783 */
784 if (!is_local)
767 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 785 status = NFS_PROTO(inode)->lock(filp, cmd, fl);
768 else 786 else
769 status = do_vfs_lock(filp, fl); 787 status = do_vfs_lock(filp, fl);
770 if (status < 0) 788 if (status < 0)
771 goto out; 789 goto out;
790
772 /* 791 /*
773 * Make sure we clear the cache whenever we try to get the lock. 792 * Revalidate the cache if the server has time stamps granular
793 * enough to detect subsecond changes. Otherwise, clear the
794 * cache to prevent missing any changes.
795 *
774 * This makes locking act as a cache coherency point. 796 * This makes locking act as a cache coherency point.
775 */ 797 */
776 nfs_sync_mapping(filp->f_mapping); 798 nfs_sync_mapping(filp->f_mapping);
777 if (!nfs_have_delegation(inode, FMODE_READ)) 799 if (!nfs_have_delegation(inode, FMODE_READ)) {
778 nfs_zap_caches(inode); 800 if (is_time_granular(&NFS_SERVER(inode)->time_delta))
801 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
802 else
803 nfs_zap_caches(inode);
804 }
779out: 805out:
780 return status; 806 return status;
781} 807}
@@ -787,6 +813,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
787{ 813{
788 struct inode *inode = filp->f_mapping->host; 814 struct inode *inode = filp->f_mapping->host;
789 int ret = -ENOLCK; 815 int ret = -ENOLCK;
816 int is_local = 0;
790 817
791 dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", 818 dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n",
792 filp->f_path.dentry->d_parent->d_name.name, 819 filp->f_path.dentry->d_parent->d_name.name,
@@ -800,6 +827,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
800 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) 827 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
801 goto out_err; 828 goto out_err;
802 829
830 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
831 is_local = 1;
832
803 if (NFS_PROTO(inode)->lock_check_bounds != NULL) { 833 if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
804 ret = NFS_PROTO(inode)->lock_check_bounds(fl); 834 ret = NFS_PROTO(inode)->lock_check_bounds(fl);
805 if (ret < 0) 835 if (ret < 0)
@@ -807,11 +837,11 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
807 } 837 }
808 838
809 if (IS_GETLK(cmd)) 839 if (IS_GETLK(cmd))
810 ret = do_getlk(filp, cmd, fl); 840 ret = do_getlk(filp, cmd, fl, is_local);
811 else if (fl->fl_type == F_UNLCK) 841 else if (fl->fl_type == F_UNLCK)
812 ret = do_unlk(filp, cmd, fl); 842 ret = do_unlk(filp, cmd, fl, is_local);
813 else 843 else
814 ret = do_setlk(filp, cmd, fl); 844 ret = do_setlk(filp, cmd, fl, is_local);
815out_err: 845out_err:
816 return ret; 846 return ret;
817} 847}
@@ -821,6 +851,9 @@ out_err:
821 */ 851 */
822static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) 852static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
823{ 853{
854 struct inode *inode = filp->f_mapping->host;
855 int is_local = 0;
856
824 dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", 857 dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n",
825 filp->f_path.dentry->d_parent->d_name.name, 858 filp->f_path.dentry->d_parent->d_name.name,
826 filp->f_path.dentry->d_name.name, 859 filp->f_path.dentry->d_name.name,
@@ -829,14 +862,17 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
829 if (!(fl->fl_flags & FL_FLOCK)) 862 if (!(fl->fl_flags & FL_FLOCK))
830 return -ENOLCK; 863 return -ENOLCK;
831 864
865 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
866 is_local = 1;
867
832 /* We're simulating flock() locks using posix locks on the server */ 868 /* We're simulating flock() locks using posix locks on the server */
833 fl->fl_owner = (fl_owner_t)filp; 869 fl->fl_owner = (fl_owner_t)filp;
834 fl->fl_start = 0; 870 fl->fl_start = 0;
835 fl->fl_end = OFFSET_MAX; 871 fl->fl_end = OFFSET_MAX;
836 872
837 if (fl->fl_type == F_UNLCK) 873 if (fl->fl_type == F_UNLCK)
838 return do_unlk(filp, cmd, fl); 874 return do_unlk(filp, cmd, fl, is_local);
839 return do_setlk(filp, cmd, fl); 875 return do_setlk(filp, cmd, fl, is_local);
840} 876}
841 877
842/* 878/*
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 21a84d45916f..dec47ed8b6b9 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -34,6 +34,212 @@
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36 36
37#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
38
39#include <linux/slab.h>
40#include <linux/cred.h>
41#include <linux/nfs_idmap.h>
42#include <linux/keyctl.h>
43#include <linux/key-type.h>
44#include <linux/rcupdate.h>
45#include <linux/kernel.h>
46#include <linux/err.h>
47
48#include <keys/user-type.h>
49
50#define NFS_UINT_MAXLEN 11
51
52const struct cred *id_resolver_cache;
53
54struct key_type key_type_id_resolver = {
55 .name = "id_resolver",
56 .instantiate = user_instantiate,
57 .match = user_match,
58 .revoke = user_revoke,
59 .destroy = user_destroy,
60 .describe = user_describe,
61 .read = user_read,
62};
63
64int nfs_idmap_init(void)
65{
66 struct cred *cred;
67 struct key *keyring;
68 int ret = 0;
69
70 printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name);
71
72 cred = prepare_kernel_cred(NULL);
73 if (!cred)
74 return -ENOMEM;
75
76 keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred,
77 (KEY_POS_ALL & ~KEY_POS_SETATTR) |
78 KEY_USR_VIEW | KEY_USR_READ,
79 KEY_ALLOC_NOT_IN_QUOTA);
80 if (IS_ERR(keyring)) {
81 ret = PTR_ERR(keyring);
82 goto failed_put_cred;
83 }
84
85 ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL);
86 if (ret < 0)
87 goto failed_put_key;
88
89 ret = register_key_type(&key_type_id_resolver);
90 if (ret < 0)
91 goto failed_put_key;
92
93 cred->thread_keyring = keyring;
94 cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
95 id_resolver_cache = cred;
96 return 0;
97
98failed_put_key:
99 key_put(keyring);
100failed_put_cred:
101 put_cred(cred);
102 return ret;
103}
104
105void nfs_idmap_quit(void)
106{
107 key_revoke(id_resolver_cache->thread_keyring);
108 unregister_key_type(&key_type_id_resolver);
109 put_cred(id_resolver_cache);
110}
111
112/*
113 * Assemble the description to pass to request_key()
114 * This function will allocate a new string and update dest to point
115 * at it. The caller is responsible for freeing dest.
116 *
117 * On error 0 is returned. Otherwise, the length of dest is returned.
118 */
119static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
120 const char *type, size_t typelen, char **desc)
121{
122 char *cp;
123 size_t desclen = typelen + namelen + 2;
124
125 *desc = kmalloc(desclen, GFP_KERNEL);
126 if (!desc)
127 return -ENOMEM;
128
129 cp = *desc;
130 memcpy(cp, type, typelen);
131 cp += typelen;
132 *cp++ = ':';
133
134 memcpy(cp, name, namelen);
135 cp += namelen;
136 *cp = '\0';
137 return desclen;
138}
139
140static ssize_t nfs_idmap_request_key(const char *name, size_t namelen,
141 const char *type, void *data, size_t data_size)
142{
143 const struct cred *saved_cred;
144 struct key *rkey;
145 char *desc;
146 struct user_key_payload *payload;
147 ssize_t ret;
148
149 ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc);
150 if (ret <= 0)
151 goto out;
152
153 saved_cred = override_creds(id_resolver_cache);
154 rkey = request_key(&key_type_id_resolver, desc, "");
155 revert_creds(saved_cred);
156 kfree(desc);
157 if (IS_ERR(rkey)) {
158 ret = PTR_ERR(rkey);
159 goto out;
160 }
161
162 rcu_read_lock();
163 rkey->perm |= KEY_USR_VIEW;
164
165 ret = key_validate(rkey);
166 if (ret < 0)
167 goto out_up;
168
169 payload = rcu_dereference(rkey->payload.data);
170 if (IS_ERR_OR_NULL(payload)) {
171 ret = PTR_ERR(payload);
172 goto out_up;
173 }
174
175 ret = payload->datalen;
176 if (ret > 0 && ret <= data_size)
177 memcpy(data, payload->data, ret);
178 else
179 ret = -EINVAL;
180
181out_up:
182 rcu_read_unlock();
183 key_put(rkey);
184out:
185 return ret;
186}
187
188
189/* ID -> Name */
190static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen)
191{
192 char id_str[NFS_UINT_MAXLEN];
193 int id_len;
194 ssize_t ret;
195
196 id_len = snprintf(id_str, sizeof(id_str), "%u", id);
197 ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen);
198 if (ret < 0)
199 return -EINVAL;
200 return ret;
201}
202
203/* Name -> ID */
204static int nfs_idmap_lookup_id(const char *name, size_t namelen,
205 const char *type, __u32 *id)
206{
207 char id_str[NFS_UINT_MAXLEN];
208 long id_long;
209 ssize_t data_size;
210 int ret = 0;
211
212 data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN);
213 if (data_size <= 0) {
214 ret = -EINVAL;
215 } else {
216 ret = strict_strtol(id_str, 10, &id_long);
217 *id = (__u32)id_long;
218 }
219 return ret;
220}
221
222int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
223{
224 return nfs_idmap_lookup_id(name, namelen, "uid", uid);
225}
226
227int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid)
228{
229 return nfs_idmap_lookup_id(name, namelen, "gid", gid);
230}
231
232int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
233{
234 return nfs_idmap_lookup_name(uid, "user", buf, buflen);
235}
236int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen)
237{
238 return nfs_idmap_lookup_name(gid, "group", buf, buflen);
239}
240
241#else /* CONFIG_NFS_USE_IDMAPPER not defined */
242
37#include <linux/module.h> 243#include <linux/module.h>
38#include <linux/mutex.h> 244#include <linux/mutex.h>
39#include <linux/init.h> 245#include <linux/init.h>
@@ -503,16 +709,17 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele
503 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 709 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
504} 710}
505 711
506int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf) 712int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
507{ 713{
508 struct idmap *idmap = clp->cl_idmap; 714 struct idmap *idmap = clp->cl_idmap;
509 715
510 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 716 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
511} 717}
512int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf) 718int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
513{ 719{
514 struct idmap *idmap = clp->cl_idmap; 720 struct idmap *idmap = clp->cl_idmap;
515 721
516 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); 722 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
517} 723}
518 724
725#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7d2d6c72aa78..314f57164602 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -48,6 +48,7 @@
48#include "internal.h" 48#include "internal.h"
49#include "fscache.h" 49#include "fscache.h"
50#include "dns_resolve.h" 50#include "dns_resolve.h"
51#include "pnfs.h"
51 52
52#define NFSDBG_FACILITY NFSDBG_VFS 53#define NFSDBG_FACILITY NFSDBG_VFS
53 54
@@ -234,9 +235,6 @@ nfs_init_locked(struct inode *inode, void *opaque)
234 return 0; 235 return 0;
235} 236}
236 237
237/* Don't use READDIRPLUS on directories that we believe are too large */
238#define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE)
239
240/* 238/*
241 * This is our front-end to iget that looks up inodes by file handle 239 * This is our front-end to iget that looks up inodes by file handle
242 * instead of inode number. 240 * instead of inode number.
@@ -291,8 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
291 } else if (S_ISDIR(inode->i_mode)) { 289 } else if (S_ISDIR(inode->i_mode)) {
292 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; 290 inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
293 inode->i_fop = &nfs_dir_operations; 291 inode->i_fop = &nfs_dir_operations;
294 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) 292 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
295 && fattr->size <= NFS_LIMIT_READDIRPLUS)
296 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 293 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
297 /* Deal with crossing mountpoints */ 294 /* Deal with crossing mountpoints */
298 if ((fattr->valid & NFS_ATTR_FATTR_FSID) 295 if ((fattr->valid & NFS_ATTR_FATTR_FSID)
@@ -623,7 +620,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
623 nfs_revalidate_inode(server, inode); 620 nfs_revalidate_inode(server, inode);
624} 621}
625 622
626static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred) 623struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode)
627{ 624{
628 struct nfs_open_context *ctx; 625 struct nfs_open_context *ctx;
629 626
@@ -633,11 +630,13 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct
633 path_get(&ctx->path); 630 path_get(&ctx->path);
634 ctx->cred = get_rpccred(cred); 631 ctx->cred = get_rpccred(cred);
635 ctx->state = NULL; 632 ctx->state = NULL;
633 ctx->mode = f_mode;
636 ctx->flags = 0; 634 ctx->flags = 0;
637 ctx->error = 0; 635 ctx->error = 0;
638 ctx->dir_cookie = 0; 636 ctx->dir_cookie = 0;
639 nfs_init_lock_context(&ctx->lock_context); 637 nfs_init_lock_context(&ctx->lock_context);
640 ctx->lock_context.open_context = ctx; 638 ctx->lock_context.open_context = ctx;
639 INIT_LIST_HEAD(&ctx->list);
641 } 640 }
642 return ctx; 641 return ctx;
643} 642}
@@ -653,11 +652,15 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
653{ 652{
654 struct inode *inode = ctx->path.dentry->d_inode; 653 struct inode *inode = ctx->path.dentry->d_inode;
655 654
656 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) 655 if (!list_empty(&ctx->list)) {
656 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
657 return;
658 list_del(&ctx->list);
659 spin_unlock(&inode->i_lock);
660 } else if (!atomic_dec_and_test(&ctx->lock_context.count))
657 return; 661 return;
658 list_del(&ctx->list); 662 if (inode != NULL)
659 spin_unlock(&inode->i_lock); 663 NFS_PROTO(inode)->close_context(ctx, is_sync);
660 NFS_PROTO(inode)->close_context(ctx, is_sync);
661 if (ctx->cred != NULL) 664 if (ctx->cred != NULL)
662 put_rpccred(ctx->cred); 665 put_rpccred(ctx->cred);
663 path_put(&ctx->path); 666 path_put(&ctx->path);
@@ -673,7 +676,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
673 * Ensure that mmap has a recent RPC credential for use when writing out 676 * Ensure that mmap has a recent RPC credential for use when writing out
674 * shared pages 677 * shared pages
675 */ 678 */
676static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) 679void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
677{ 680{
678 struct inode *inode = filp->f_path.dentry->d_inode; 681 struct inode *inode = filp->f_path.dentry->d_inode;
679 struct nfs_inode *nfsi = NFS_I(inode); 682 struct nfs_inode *nfsi = NFS_I(inode);
@@ -730,11 +733,10 @@ int nfs_open(struct inode *inode, struct file *filp)
730 cred = rpc_lookup_cred(); 733 cred = rpc_lookup_cred();
731 if (IS_ERR(cred)) 734 if (IS_ERR(cred))
732 return PTR_ERR(cred); 735 return PTR_ERR(cred);
733 ctx = alloc_nfs_open_context(&filp->f_path, cred); 736 ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode);
734 put_rpccred(cred); 737 put_rpccred(cred);
735 if (ctx == NULL) 738 if (ctx == NULL)
736 return -ENOMEM; 739 return -ENOMEM;
737 ctx->mode = filp->f_mode;
738 nfs_file_set_open_context(filp, ctx); 740 nfs_file_set_open_context(filp, ctx);
739 put_nfs_open_context(ctx); 741 put_nfs_open_context(ctx);
740 nfs_fscache_set_inode_cookie(inode, filp); 742 nfs_fscache_set_inode_cookie(inode, filp);
@@ -1409,6 +1411,7 @@ void nfs4_evict_inode(struct inode *inode)
1409{ 1411{
1410 truncate_inode_pages(&inode->i_data, 0); 1412 truncate_inode_pages(&inode->i_data, 0);
1411 end_writeback(inode); 1413 end_writeback(inode);
1414 pnfs_destroy_layout(NFS_I(inode));
1412 /* If we are holding a delegation, return it! */ 1415 /* If we are holding a delegation, return it! */
1413 nfs_inode_return_delegation_noreclaim(inode); 1416 nfs_inode_return_delegation_noreclaim(inode);
1414 /* First call standard NFS clear_inode() code */ 1417 /* First call standard NFS clear_inode() code */
@@ -1446,6 +1449,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1446 nfsi->delegation = NULL; 1449 nfsi->delegation = NULL;
1447 nfsi->delegation_state = 0; 1450 nfsi->delegation_state = 0;
1448 init_rwsem(&nfsi->rwsem); 1451 init_rwsem(&nfsi->rwsem);
1452 nfsi->layout = NULL;
1449#endif 1453#endif
1450} 1454}
1451 1455
@@ -1493,7 +1497,7 @@ static int nfsiod_start(void)
1493{ 1497{
1494 struct workqueue_struct *wq; 1498 struct workqueue_struct *wq;
1495 dprintk("RPC: creating workqueue nfsiod\n"); 1499 dprintk("RPC: creating workqueue nfsiod\n");
1496 wq = create_singlethread_workqueue("nfsiod"); 1500 wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0);
1497 if (wq == NULL) 1501 if (wq == NULL)
1498 return -ENOMEM; 1502 return -ENOMEM;
1499 nfsiod_workqueue = wq; 1503 nfsiod_workqueue = wq;
@@ -1521,6 +1525,10 @@ static int __init init_nfs_fs(void)
1521{ 1525{
1522 int err; 1526 int err;
1523 1527
1528 err = nfs_idmap_init();
1529 if (err < 0)
1530 goto out9;
1531
1524 err = nfs_dns_resolver_init(); 1532 err = nfs_dns_resolver_init();
1525 if (err < 0) 1533 if (err < 0)
1526 goto out8; 1534 goto out8;
@@ -1585,6 +1593,8 @@ out6:
1585out7: 1593out7:
1586 nfs_dns_resolver_destroy(); 1594 nfs_dns_resolver_destroy();
1587out8: 1595out8:
1596 nfs_idmap_quit();
1597out9:
1588 return err; 1598 return err;
1589} 1599}
1590 1600
@@ -1597,6 +1607,7 @@ static void __exit exit_nfs_fs(void)
1597 nfs_destroy_nfspagecache(); 1607 nfs_destroy_nfspagecache();
1598 nfs_fscache_unregister(); 1608 nfs_fscache_unregister();
1599 nfs_dns_resolver_destroy(); 1609 nfs_dns_resolver_destroy();
1610 nfs_idmap_quit();
1600#ifdef CONFIG_PROC_FS 1611#ifdef CONFIG_PROC_FS
1601 rpc_proc_unregister("nfs"); 1612 rpc_proc_unregister("nfs");
1602#endif 1613#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c961bc92c107..db08ff3ff454 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -63,6 +63,12 @@ struct nfs_clone_mount {
63#define NFS_UNSPEC_PORT (-1) 63#define NFS_UNSPEC_PORT (-1)
64 64
65/* 65/*
66 * Maximum number of pages that readdir can use for creating
67 * a vmapped array of pages.
68 */
69#define NFS_MAX_READDIR_PAGES 8
70
71/*
66 * In-kernel mount arguments 72 * In-kernel mount arguments
67 */ 73 */
68struct nfs_parsed_mount_data { 74struct nfs_parsed_mount_data {
@@ -181,15 +187,15 @@ extern void nfs_destroy_directcache(void);
181/* nfs2xdr.c */ 187/* nfs2xdr.c */
182extern int nfs_stat_to_errno(int); 188extern int nfs_stat_to_errno(int);
183extern struct rpc_procinfo nfs_procedures[]; 189extern struct rpc_procinfo nfs_procedures[];
184extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int); 190extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
185 191
186/* nfs3xdr.c */ 192/* nfs3xdr.c */
187extern struct rpc_procinfo nfs3_procedures[]; 193extern struct rpc_procinfo nfs3_procedures[];
188extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); 194extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
189 195
190/* nfs4xdr.c */ 196/* nfs4xdr.c */
191#ifdef CONFIG_NFS_V4 197#ifdef CONFIG_NFS_V4
192extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); 198extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
193#endif 199#endif
194#ifdef CONFIG_NFS_V4_1 200#ifdef CONFIG_NFS_V4_1
195extern const u32 nfs41_maxread_overhead; 201extern const u32 nfs41_maxread_overhead;
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 59047f8d7d72..eceafe74f473 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info)
153 .rpc_resp = &result, 153 .rpc_resp = &result,
154 }; 154 };
155 struct rpc_create_args args = { 155 struct rpc_create_args args = {
156 .net = &init_net,
156 .protocol = info->protocol, 157 .protocol = info->protocol,
157 .address = info->sap, 158 .address = info->sap,
158 .addrsize = info->salen, 159 .addrsize = info->salen,
@@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info)
224 .to_retries = 2, 225 .to_retries = 2,
225 }; 226 };
226 struct rpc_create_args args = { 227 struct rpc_create_args args = {
228 .net = &init_net,
227 .protocol = IPPROTO_UDP, 229 .protocol = IPPROTO_UDP,
228 .address = info->sap, 230 .address = info->sap,
229 .addrsize = info->salen, 231 .addrsize = info->salen,
@@ -436,7 +438,7 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res)
436 438
437 for (i = 0; i < entries; i++) { 439 for (i = 0; i < entries; i++) {
438 flavors[i] = ntohl(*p++); 440 flavors[i] = ntohl(*p++);
439 dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]); 441 dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]);
440 } 442 }
441 *count = i; 443 *count = i;
442 444
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index db8846a0e82e..e6bf45710cc7 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -337,10 +337,10 @@ nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args)
337static int 337static int
338nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) 338nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
339{ 339{
340 p = xdr_encode_fhandle(p, args->fromfh); 340 p = xdr_encode_fhandle(p, args->old_dir);
341 p = xdr_encode_array(p, args->fromname, args->fromlen); 341 p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
342 p = xdr_encode_fhandle(p, args->tofh); 342 p = xdr_encode_fhandle(p, args->new_dir);
343 p = xdr_encode_array(p, args->toname, args->tolen); 343 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
344 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 344 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
345 return 0; 345 return 0;
346} 346}
@@ -423,9 +423,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
423 struct page **page; 423 struct page **page;
424 size_t hdrlen; 424 size_t hdrlen;
425 unsigned int pglen, recvd; 425 unsigned int pglen, recvd;
426 u32 len;
427 int status, nr = 0; 426 int status, nr = 0;
428 __be32 *end, *entry, *kaddr;
429 427
430 if ((status = ntohl(*p++))) 428 if ((status = ntohl(*p++)))
431 return nfs_stat_to_errno(status); 429 return nfs_stat_to_errno(status);
@@ -445,80 +443,59 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
445 if (pglen > recvd) 443 if (pglen > recvd)
446 pglen = recvd; 444 pglen = recvd;
447 page = rcvbuf->pages; 445 page = rcvbuf->pages;
448 kaddr = p = kmap_atomic(*page, KM_USER0);
449 end = (__be32 *)((char *)p + pglen);
450 entry = p;
451
452 /* Make sure the packet actually has a value_follows and EOF entry */
453 if ((entry + 1) > end)
454 goto short_pkt;
455
456 for (; *p++; nr++) {
457 if (p + 2 > end)
458 goto short_pkt;
459 p++; /* fileid */
460 len = ntohl(*p++);
461 p += XDR_QUADLEN(len) + 1; /* name plus cookie */
462 if (len > NFS2_MAXNAMLEN) {
463 dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
464 len);
465 goto err_unmap;
466 }
467 if (p + 2 > end)
468 goto short_pkt;
469 entry = p;
470 }
471
472 /*
473 * Apparently some server sends responses that are a valid size, but
474 * contain no entries, and have value_follows==0 and EOF==0. For
475 * those, just set the EOF marker.
476 */
477 if (!nr && entry[1] == 0) {
478 dprintk("NFS: readdir reply truncated!\n");
479 entry[1] = 1;
480 }
481 out:
482 kunmap_atomic(kaddr, KM_USER0);
483 return nr; 446 return nr;
484 short_pkt: 447}
485 /* 448
486 * When we get a short packet there are 2 possibilities. We can 449static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
487 * return an error, or fix up the response to look like a valid 450{
488 * response and return what we have so far. If there are no 451 dprintk("nfs: %s: prematurely hit end of receive buffer. "
489 * entries and the packet was short, then return -EIO. If there 452 "Remaining buffer length is %tu words.\n",
490 * are valid entries in the response, return them and pretend that 453 func, xdr->end - xdr->p);
491 * the call was successful, but incomplete. The caller can retry the
492 * readdir starting at the last cookie.
493 */
494 entry[0] = entry[1] = 0;
495 if (!nr)
496 nr = -errno_NFSERR_IO;
497 goto out;
498err_unmap:
499 nr = -errno_NFSERR_IO;
500 goto out;
501} 454}
502 455
503__be32 * 456__be32 *
504nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 457nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
505{ 458{
506 if (!*p++) { 459 __be32 *p;
507 if (!*p) 460 p = xdr_inline_decode(xdr, 4);
461 if (unlikely(!p))
462 goto out_overflow;
463 if (!ntohl(*p++)) {
464 p = xdr_inline_decode(xdr, 4);
465 if (unlikely(!p))
466 goto out_overflow;
467 if (!ntohl(*p++))
508 return ERR_PTR(-EAGAIN); 468 return ERR_PTR(-EAGAIN);
509 entry->eof = 1; 469 entry->eof = 1;
510 return ERR_PTR(-EBADCOOKIE); 470 return ERR_PTR(-EBADCOOKIE);
511 } 471 }
512 472
473 p = xdr_inline_decode(xdr, 8);
474 if (unlikely(!p))
475 goto out_overflow;
476
513 entry->ino = ntohl(*p++); 477 entry->ino = ntohl(*p++);
514 entry->len = ntohl(*p++); 478 entry->len = ntohl(*p++);
479
480 p = xdr_inline_decode(xdr, entry->len + 4);
481 if (unlikely(!p))
482 goto out_overflow;
515 entry->name = (const char *) p; 483 entry->name = (const char *) p;
516 p += XDR_QUADLEN(entry->len); 484 p += XDR_QUADLEN(entry->len);
517 entry->prev_cookie = entry->cookie; 485 entry->prev_cookie = entry->cookie;
518 entry->cookie = ntohl(*p++); 486 entry->cookie = ntohl(*p++);
519 entry->eof = !p[0] && p[1]; 487
488 p = xdr_inline_peek(xdr, 8);
489 if (p != NULL)
490 entry->eof = !p[0] && p[1];
491 else
492 entry->eof = 0;
520 493
521 return p; 494 return p;
495
496out_overflow:
497 print_overflow_msg(__func__, xdr);
498 return ERR_PTR(-EIO);
522} 499}
523 500
524/* 501/*
@@ -596,7 +573,6 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
596 struct kvec *iov = rcvbuf->head; 573 struct kvec *iov = rcvbuf->head;
597 size_t hdrlen; 574 size_t hdrlen;
598 u32 len, recvd; 575 u32 len, recvd;
599 char *kaddr;
600 int status; 576 int status;
601 577
602 if ((status = ntohl(*p++))) 578 if ((status = ntohl(*p++)))
@@ -623,10 +599,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
623 return -EIO; 599 return -EIO;
624 } 600 }
625 601
626 /* NULL terminate the string we got */ 602 xdr_terminate_string(rcvbuf, len);
627 kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0);
628 kaddr[len+rcvbuf->page_base] = '\0';
629 kunmap_atomic(kaddr, KM_USER0);
630 return 0; 603 return 0;
631} 604}
632 605
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index fabb4f2849a1..ce939c062a52 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -313,7 +313,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data)
313 */ 313 */
314static int 314static int
315nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 315nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
316 int flags, struct nameidata *nd) 316 int flags, struct nfs_open_context *ctx)
317{ 317{
318 struct nfs3_createdata *data; 318 struct nfs3_createdata *data;
319 mode_t mode = sattr->ia_mode; 319 mode_t mode = sattr->ia_mode;
@@ -438,19 +438,38 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
438 return 1; 438 return 1;
439} 439}
440 440
441static void
442nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
443{
444 msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME];
445}
446
447static int
448nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
449 struct inode *new_dir)
450{
451 struct nfs_renameres *res;
452
453 if (nfs3_async_handle_jukebox(task, old_dir))
454 return 0;
455 res = task->tk_msg.rpc_resp;
456
457 nfs_post_op_update_inode(old_dir, res->old_fattr);
458 nfs_post_op_update_inode(new_dir, res->new_fattr);
459 return 1;
460}
461
441static int 462static int
442nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, 463nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
443 struct inode *new_dir, struct qstr *new_name) 464 struct inode *new_dir, struct qstr *new_name)
444{ 465{
445 struct nfs3_renameargs arg = { 466 struct nfs_renameargs arg = {
446 .fromfh = NFS_FH(old_dir), 467 .old_dir = NFS_FH(old_dir),
447 .fromname = old_name->name, 468 .old_name = old_name,
448 .fromlen = old_name->len, 469 .new_dir = NFS_FH(new_dir),
449 .tofh = NFS_FH(new_dir), 470 .new_name = new_name,
450 .toname = new_name->name,
451 .tolen = new_name->len
452 }; 471 };
453 struct nfs3_renameres res; 472 struct nfs_renameres res;
454 struct rpc_message msg = { 473 struct rpc_message msg = {
455 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], 474 .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME],
456 .rpc_argp = &arg, 475 .rpc_argp = &arg,
@@ -460,17 +479,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name,
460 479
461 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); 480 dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name);
462 481
463 res.fromattr = nfs_alloc_fattr(); 482 res.old_fattr = nfs_alloc_fattr();
464 res.toattr = nfs_alloc_fattr(); 483 res.new_fattr = nfs_alloc_fattr();
465 if (res.fromattr == NULL || res.toattr == NULL) 484 if (res.old_fattr == NULL || res.new_fattr == NULL)
466 goto out; 485 goto out;
467 486
468 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); 487 status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
469 nfs_post_op_update_inode(old_dir, res.fromattr); 488 nfs_post_op_update_inode(old_dir, res.old_fattr);
470 nfs_post_op_update_inode(new_dir, res.toattr); 489 nfs_post_op_update_inode(new_dir, res.new_fattr);
471out: 490out:
472 nfs_free_fattr(res.toattr); 491 nfs_free_fattr(res.old_fattr);
473 nfs_free_fattr(res.fromattr); 492 nfs_free_fattr(res.new_fattr);
474 dprintk("NFS reply rename: %d\n", status); 493 dprintk("NFS reply rename: %d\n", status);
475 return status; 494 return status;
476} 495}
@@ -611,7 +630,7 @@ out:
611 */ 630 */
612static int 631static int
613nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 632nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
614 u64 cookie, struct page *page, unsigned int count, int plus) 633 u64 cookie, struct page **pages, unsigned int count, int plus)
615{ 634{
616 struct inode *dir = dentry->d_inode; 635 struct inode *dir = dentry->d_inode;
617 __be32 *verf = NFS_COOKIEVERF(dir); 636 __be32 *verf = NFS_COOKIEVERF(dir);
@@ -621,7 +640,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
621 .verf = {verf[0], verf[1]}, 640 .verf = {verf[0], verf[1]},
622 .plus = plus, 641 .plus = plus,
623 .count = count, 642 .count = count,
624 .pages = &page 643 .pages = pages
625 }; 644 };
626 struct nfs3_readdirres res = { 645 struct nfs3_readdirres res = {
627 .verf = verf, 646 .verf = verf,
@@ -652,7 +671,8 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
652 671
653 nfs_free_fattr(res.dir_attr); 672 nfs_free_fattr(res.dir_attr);
654out: 673out:
655 dprintk("NFS reply readdir: %d\n", status); 674 dprintk("NFS reply readdir%s: %d\n",
675 plus? "plus" : "", status);
656 return status; 676 return status;
657} 677}
658 678
@@ -722,7 +742,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
722 dprintk("NFS call fsstat\n"); 742 dprintk("NFS call fsstat\n");
723 nfs_fattr_init(stat->fattr); 743 nfs_fattr_init(stat->fattr);
724 status = rpc_call_sync(server->client, &msg, 0); 744 status = rpc_call_sync(server->client, &msg, 0);
725 dprintk("NFS reply statfs: %d\n", status); 745 dprintk("NFS reply fsstat: %d\n", status);
726 return status; 746 return status;
727} 747}
728 748
@@ -844,6 +864,8 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
844 .unlink_setup = nfs3_proc_unlink_setup, 864 .unlink_setup = nfs3_proc_unlink_setup,
845 .unlink_done = nfs3_proc_unlink_done, 865 .unlink_done = nfs3_proc_unlink_done,
846 .rename = nfs3_proc_rename, 866 .rename = nfs3_proc_rename,
867 .rename_setup = nfs3_proc_rename_setup,
868 .rename_done = nfs3_proc_rename_done,
847 .link = nfs3_proc_link, 869 .link = nfs3_proc_link,
848 .symlink = nfs3_proc_symlink, 870 .symlink = nfs3_proc_symlink,
849 .mkdir = nfs3_proc_mkdir, 871 .mkdir = nfs3_proc_mkdir,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 9769704f8ce6..d9a5e832c257 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -100,6 +100,13 @@ static const umode_t nfs_type2fmt[] = {
100 [NF3FIFO] = S_IFIFO, 100 [NF3FIFO] = S_IFIFO,
101}; 101};
102 102
103static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
104{
105 dprintk("nfs: %s: prematurely hit end of receive buffer. "
106 "Remaining buffer length is %tu words.\n",
107 func, xdr->end - xdr->p);
108}
109
103/* 110/*
104 * Common NFS XDR functions as inlines 111 * Common NFS XDR functions as inlines
105 */ 112 */
@@ -119,6 +126,29 @@ xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh)
119 return NULL; 126 return NULL;
120} 127}
121 128
129static inline __be32 *
130xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh)
131{
132 __be32 *p;
133 p = xdr_inline_decode(xdr, 4);
134 if (unlikely(!p))
135 goto out_overflow;
136 fh->size = ntohl(*p++);
137
138 if (fh->size <= NFS3_FHSIZE) {
139 p = xdr_inline_decode(xdr, fh->size);
140 if (unlikely(!p))
141 goto out_overflow;
142 memcpy(fh->data, p, fh->size);
143 return p + XDR_QUADLEN(fh->size);
144 }
145 return NULL;
146
147out_overflow:
148 print_overflow_msg(__func__, xdr);
149 return ERR_PTR(-EIO);
150}
151
122/* 152/*
123 * Encode/decode time. 153 * Encode/decode time.
124 */ 154 */
@@ -241,6 +271,26 @@ xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr)
241} 271}
242 272
243static inline __be32 * 273static inline __be32 *
274xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr)
275{
276 __be32 *p;
277
278 p = xdr_inline_decode(xdr, 4);
279 if (unlikely(!p))
280 goto out_overflow;
281 if (ntohl(*p++)) {
282 p = xdr_inline_decode(xdr, 84);
283 if (unlikely(!p))
284 goto out_overflow;
285 p = xdr_decode_fattr(p, fattr);
286 }
287 return p;
288out_overflow:
289 print_overflow_msg(__func__, xdr);
290 return ERR_PTR(-EIO);
291}
292
293static inline __be32 *
244xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr) 294xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr)
245{ 295{
246 if (*p++) 296 if (*p++)
@@ -442,12 +492,12 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args)
442 * Encode RENAME arguments 492 * Encode RENAME arguments
443 */ 493 */
444static int 494static int
445nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs3_renameargs *args) 495nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args)
446{ 496{
447 p = xdr_encode_fhandle(p, args->fromfh); 497 p = xdr_encode_fhandle(p, args->old_dir);
448 p = xdr_encode_array(p, args->fromname, args->fromlen); 498 p = xdr_encode_array(p, args->old_name->name, args->old_name->len);
449 p = xdr_encode_fhandle(p, args->tofh); 499 p = xdr_encode_fhandle(p, args->new_dir);
450 p = xdr_encode_array(p, args->toname, args->tolen); 500 p = xdr_encode_array(p, args->new_name->name, args->new_name->len);
451 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); 501 req->rq_slen = xdr_adjust_iovec(req->rq_svec, p);
452 return 0; 502 return 0;
453} 503}
@@ -504,9 +554,8 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
504 struct kvec *iov = rcvbuf->head; 554 struct kvec *iov = rcvbuf->head;
505 struct page **page; 555 struct page **page;
506 size_t hdrlen; 556 size_t hdrlen;
507 u32 len, recvd, pglen; 557 u32 recvd, pglen;
508 int status, nr = 0; 558 int status, nr = 0;
509 __be32 *entry, *end, *kaddr;
510 559
511 status = ntohl(*p++); 560 status = ntohl(*p++);
512 /* Decode post_op_attrs */ 561 /* Decode post_op_attrs */
@@ -536,99 +585,38 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
536 if (pglen > recvd) 585 if (pglen > recvd)
537 pglen = recvd; 586 pglen = recvd;
538 page = rcvbuf->pages; 587 page = rcvbuf->pages;
539 kaddr = p = kmap_atomic(*page, KM_USER0);
540 end = (__be32 *)((char *)p + pglen);
541 entry = p;
542
543 /* Make sure the packet actually has a value_follows and EOF entry */
544 if ((entry + 1) > end)
545 goto short_pkt;
546
547 for (; *p++; nr++) {
548 if (p + 3 > end)
549 goto short_pkt;
550 p += 2; /* inode # */
551 len = ntohl(*p++); /* string length */
552 p += XDR_QUADLEN(len) + 2; /* name + cookie */
553 if (len > NFS3_MAXNAMLEN) {
554 dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
555 len);
556 goto err_unmap;
557 }
558 588
559 if (res->plus) {
560 /* post_op_attr */
561 if (p + 2 > end)
562 goto short_pkt;
563 if (*p++) {
564 p += 21;
565 if (p + 1 > end)
566 goto short_pkt;
567 }
568 /* post_op_fh3 */
569 if (*p++) {
570 if (p + 1 > end)
571 goto short_pkt;
572 len = ntohl(*p++);
573 if (len > NFS3_FHSIZE) {
574 dprintk("NFS: giant filehandle in "
575 "readdir (len 0x%x)!\n", len);
576 goto err_unmap;
577 }
578 p += XDR_QUADLEN(len);
579 }
580 }
581
582 if (p + 2 > end)
583 goto short_pkt;
584 entry = p;
585 }
586
587 /*
588 * Apparently some server sends responses that are a valid size, but
589 * contain no entries, and have value_follows==0 and EOF==0. For
590 * those, just set the EOF marker.
591 */
592 if (!nr && entry[1] == 0) {
593 dprintk("NFS: readdir reply truncated!\n");
594 entry[1] = 1;
595 }
596 out:
597 kunmap_atomic(kaddr, KM_USER0);
598 return nr; 589 return nr;
599 short_pkt:
600 /*
601 * When we get a short packet there are 2 possibilities. We can
602 * return an error, or fix up the response to look like a valid
603 * response and return what we have so far. If there are no
604 * entries and the packet was short, then return -EIO. If there
605 * are valid entries in the response, return them and pretend that
606 * the call was successful, but incomplete. The caller can retry the
607 * readdir starting at the last cookie.
608 */
609 entry[0] = entry[1] = 0;
610 if (!nr)
611 nr = -errno_NFSERR_IO;
612 goto out;
613err_unmap:
614 nr = -errno_NFSERR_IO;
615 goto out;
616} 590}
617 591
618__be32 * 592__be32 *
619nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 593nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus)
620{ 594{
595 __be32 *p;
621 struct nfs_entry old = *entry; 596 struct nfs_entry old = *entry;
622 597
623 if (!*p++) { 598 p = xdr_inline_decode(xdr, 4);
624 if (!*p) 599 if (unlikely(!p))
600 goto out_overflow;
601 if (!ntohl(*p++)) {
602 p = xdr_inline_decode(xdr, 4);
603 if (unlikely(!p))
604 goto out_overflow;
605 if (!ntohl(*p++))
625 return ERR_PTR(-EAGAIN); 606 return ERR_PTR(-EAGAIN);
626 entry->eof = 1; 607 entry->eof = 1;
627 return ERR_PTR(-EBADCOOKIE); 608 return ERR_PTR(-EBADCOOKIE);
628 } 609 }
629 610
611 p = xdr_inline_decode(xdr, 12);
612 if (unlikely(!p))
613 goto out_overflow;
630 p = xdr_decode_hyper(p, &entry->ino); 614 p = xdr_decode_hyper(p, &entry->ino);
631 entry->len = ntohl(*p++); 615 entry->len = ntohl(*p++);
616
617 p = xdr_inline_decode(xdr, entry->len + 8);
618 if (unlikely(!p))
619 goto out_overflow;
632 entry->name = (const char *) p; 620 entry->name = (const char *) p;
633 p += XDR_QUADLEN(entry->len); 621 p += XDR_QUADLEN(entry->len);
634 entry->prev_cookie = entry->cookie; 622 entry->prev_cookie = entry->cookie;
@@ -636,10 +624,17 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
636 624
637 if (plus) { 625 if (plus) {
638 entry->fattr->valid = 0; 626 entry->fattr->valid = 0;
639 p = xdr_decode_post_op_attr(p, entry->fattr); 627 p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
628 if (IS_ERR(p))
629 goto out_overflow_exit;
640 /* In fact, a post_op_fh3: */ 630 /* In fact, a post_op_fh3: */
631 p = xdr_inline_decode(xdr, 4);
632 if (unlikely(!p))
633 goto out_overflow;
641 if (*p++) { 634 if (*p++) {
642 p = xdr_decode_fhandle(p, entry->fh); 635 p = xdr_decode_fhandle_stream(xdr, entry->fh);
636 if (IS_ERR(p))
637 goto out_overflow_exit;
643 /* Ugh -- server reply was truncated */ 638 /* Ugh -- server reply was truncated */
644 if (p == NULL) { 639 if (p == NULL) {
645 dprintk("NFS: FH truncated\n"); 640 dprintk("NFS: FH truncated\n");
@@ -650,8 +645,18 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
650 memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); 645 memset((u8*)(entry->fh), 0, sizeof(*entry->fh));
651 } 646 }
652 647
653 entry->eof = !p[0] && p[1]; 648 p = xdr_inline_peek(xdr, 8);
649 if (p != NULL)
650 entry->eof = !p[0] && p[1];
651 else
652 entry->eof = 0;
653
654 return p; 654 return p;
655
656out_overflow:
657 print_overflow_msg(__func__, xdr);
658out_overflow_exit:
659 return ERR_PTR(-EIO);
655} 660}
656 661
657/* 662/*
@@ -824,7 +829,6 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
824 struct kvec *iov = rcvbuf->head; 829 struct kvec *iov = rcvbuf->head;
825 size_t hdrlen; 830 size_t hdrlen;
826 u32 len, recvd; 831 u32 len, recvd;
827 char *kaddr;
828 int status; 832 int status;
829 833
830 status = ntohl(*p++); 834 status = ntohl(*p++);
@@ -857,10 +861,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
857 return -EIO; 861 return -EIO;
858 } 862 }
859 863
860 /* NULL terminate the string we got */ 864 xdr_terminate_string(rcvbuf, len);
861 kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0);
862 kaddr[len+rcvbuf->page_base] = '\0';
863 kunmap_atomic(kaddr, KM_USER0);
864 return 0; 865 return 0;
865} 866}
866 867
@@ -970,14 +971,14 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
970 * Decode RENAME reply 971 * Decode RENAME reply
971 */ 972 */
972static int 973static int
973nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res) 974nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res)
974{ 975{
975 int status; 976 int status;
976 977
977 if ((status = ntohl(*p++)) != 0) 978 if ((status = ntohl(*p++)) != 0)
978 status = nfs_stat_to_errno(status); 979 status = nfs_stat_to_errno(status);
979 p = xdr_decode_wcc_data(p, res->fromattr); 980 p = xdr_decode_wcc_data(p, res->old_fattr);
980 p = xdr_decode_wcc_data(p, res->toattr); 981 p = xdr_decode_wcc_data(p, res->new_fattr);
981 return status; 982 return status;
982} 983}
983 984
@@ -1043,8 +1044,9 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
1043 res->wtmult = ntohl(*p++); 1044 res->wtmult = ntohl(*p++);
1044 res->dtpref = ntohl(*p++); 1045 res->dtpref = ntohl(*p++);
1045 p = xdr_decode_hyper(p, &res->maxfilesize); 1046 p = xdr_decode_hyper(p, &res->maxfilesize);
1047 p = xdr_decode_time3(p, &res->time_delta);
1046 1048
1047 /* ignore time_delta and properties */ 1049 /* ignore properties */
1048 res->lease_time = 0; 1050 res->lease_time = 0;
1049 return 0; 1051 return 0;
1050} 1052}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 311e15cc8af0..9fa496387fdf 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -242,8 +242,6 @@ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
242extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 242extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
243extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 243extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
244extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); 244extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
245extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
246extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
247extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 245extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
248extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 246extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
249 struct nfs4_fs_locations *fs_locations, struct page *page); 247 struct nfs4_fs_locations *fs_locations, struct page *page);
@@ -333,7 +331,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
333extern const nfs4_stateid zero_stateid; 331extern const nfs4_stateid zero_stateid;
334 332
335/* nfs4xdr.c */ 333/* nfs4xdr.c */
336extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); 334extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
337extern struct rpc_procinfo nfs4_procedures[]; 335extern struct rpc_procinfo nfs4_procedures[];
338 336
339struct nfs4_mount_data; 337struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
new file mode 100644
index 000000000000..2e92f0d8d654
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.c
@@ -0,0 +1,280 @@
1/*
2 * Module for the pnfs nfs4 file layout driver.
3 * Defines all I/O and Policy interface operations, plus code
4 * to register itself with the pNFS client.
5 *
6 * Copyright (c) 2002
7 * The Regents of the University of Michigan
8 * All Rights Reserved
9 *
10 * Dean Hildebrand <dhildebz@umich.edu>
11 *
12 * Permission is granted to use, copy, create derivative works, and
13 * redistribute this software and such derivative works for any purpose,
14 * so long as the name of the University of Michigan is not used in
15 * any advertising or publicity pertaining to the use or distribution
16 * of this software without specific, written prior authorization. If
17 * the above copyright notice or any other identification of the
18 * University of Michigan is included in any copy of any portion of
19 * this software, then the disclaimer below must also be included.
20 *
21 * This software is provided as is, without representation or warranty
22 * of any kind either express or implied, including without limitation
23 * the implied warranties of merchantability, fitness for a particular
24 * purpose, or noninfringement. The Regents of the University of
25 * Michigan shall not be liable for any damages, including special,
26 * indirect, incidental, or consequential damages, with respect to any
27 * claim arising out of or in connection with the use of the software,
28 * even if it has been or is hereafter advised of the possibility of
29 * such damages.
30 */
31
32#include <linux/nfs_fs.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
41MODULE_DESCRIPTION("The NFSv4 file layout driver");
42
43static int
44filelayout_set_layoutdriver(struct nfs_server *nfss)
45{
46 int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client,
47 nfs4_fl_free_deviceid_callback);
48 if (status) {
49 printk(KERN_WARNING "%s: deviceid cache could not be "
50 "initialized\n", __func__);
51 return status;
52 }
53 dprintk("%s: deviceid cache has been initialized successfully\n",
54 __func__);
55 return 0;
56}
57
58/* Clear out the layout by destroying its device list */
59static int
60filelayout_clear_layoutdriver(struct nfs_server *nfss)
61{
62 dprintk("--> %s\n", __func__);
63
64 if (nfss->nfs_client->cl_devid_cache)
65 pnfs_put_deviceid_cache(nfss->nfs_client);
66 return 0;
67}
68
69/*
70 * filelayout_check_layout()
71 *
72 * Make sure layout segment parameters are sane WRT the device.
73 * At this point no generic layer initialization of the lseg has occurred,
74 * and nothing has been added to the layout_hdr cache.
75 *
76 */
77static int
78filelayout_check_layout(struct pnfs_layout_hdr *lo,
79 struct nfs4_filelayout_segment *fl,
80 struct nfs4_layoutget_res *lgr,
81 struct nfs4_deviceid *id)
82{
83 struct nfs4_file_layout_dsaddr *dsaddr;
84 int status = -EINVAL;
85 struct nfs_server *nfss = NFS_SERVER(lo->inode);
86
87 dprintk("--> %s\n", __func__);
88
89 if (fl->pattern_offset > lgr->range.offset) {
90 dprintk("%s pattern_offset %lld to large\n",
91 __func__, fl->pattern_offset);
92 goto out;
93 }
94
95 if (fl->stripe_unit % PAGE_SIZE) {
96 dprintk("%s Stripe unit (%u) not page aligned\n",
97 __func__, fl->stripe_unit);
98 goto out;
99 }
100
101 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
103 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->inode, id);
105 if (dsaddr == NULL)
106 goto out;
107 }
108 fl->dsaddr = dsaddr;
109
110 if (fl->first_stripe_index < 0 ||
111 fl->first_stripe_index >= dsaddr->stripe_count) {
112 dprintk("%s Bad first_stripe_index %d\n",
113 __func__, fl->first_stripe_index);
114 goto out_put;
115 }
116
117 if ((fl->stripe_type == STRIPE_SPARSE &&
118 fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) ||
119 (fl->stripe_type == STRIPE_DENSE &&
120 fl->num_fh != dsaddr->stripe_count)) {
121 dprintk("%s num_fh %u not valid for given packing\n",
122 __func__, fl->num_fh);
123 goto out_put;
124 }
125
126 if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
127 dprintk("%s Stripe unit (%u) not aligned with rsize %u "
128 "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
129 nfss->wsize);
130 }
131
132 status = 0;
133out:
134 dprintk("--> %s returns %d\n", __func__, status);
135 return status;
136out_put:
137 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid);
138 goto out;
139}
140
141static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
142{
143 int i;
144
145 for (i = 0; i < fl->num_fh; i++) {
146 if (!fl->fh_array[i])
147 break;
148 kfree(fl->fh_array[i]);
149 }
150 kfree(fl->fh_array);
151 fl->fh_array = NULL;
152}
153
154static void
155_filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
156{
157 filelayout_free_fh_array(fl);
158 kfree(fl);
159}
160
161static int
162filelayout_decode_layout(struct pnfs_layout_hdr *flo,
163 struct nfs4_filelayout_segment *fl,
164 struct nfs4_layoutget_res *lgr,
165 struct nfs4_deviceid *id)
166{
167 uint32_t *p = (uint32_t *)lgr->layout.buf;
168 uint32_t nfl_util;
169 int i;
170
171 dprintk("%s: set_layout_map Begin\n", __func__);
172
173 memcpy(id, p, sizeof(*id));
174 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
175 print_deviceid(id);
176
177 nfl_util = be32_to_cpup(p++);
178 if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
179 fl->commit_through_mds = 1;
180 if (nfl_util & NFL4_UFLG_DENSE)
181 fl->stripe_type = STRIPE_DENSE;
182 else
183 fl->stripe_type = STRIPE_SPARSE;
184 fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
185
186 fl->first_stripe_index = be32_to_cpup(p++);
187 p = xdr_decode_hyper(p, &fl->pattern_offset);
188 fl->num_fh = be32_to_cpup(p++);
189
190 dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n",
191 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
192 fl->pattern_offset);
193
194 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
195 GFP_KERNEL);
196 if (!fl->fh_array)
197 return -ENOMEM;
198
199 for (i = 0; i < fl->num_fh; i++) {
200 /* Do we want to use a mempool here? */
201 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
202 if (!fl->fh_array[i]) {
203 filelayout_free_fh_array(fl);
204 return -ENOMEM;
205 }
206 fl->fh_array[i]->size = be32_to_cpup(p++);
207 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
208 printk(KERN_ERR "Too big fh %d received %d\n",
209 i, fl->fh_array[i]->size);
210 filelayout_free_fh_array(fl);
211 return -EIO;
212 }
213 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
214 p += XDR_QUADLEN(fl->fh_array[i]->size);
215 dprintk("DEBUG: %s: fh len %d\n", __func__,
216 fl->fh_array[i]->size);
217 }
218
219 return 0;
220}
221
222static struct pnfs_layout_segment *
223filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
224 struct nfs4_layoutget_res *lgr)
225{
226 struct nfs4_filelayout_segment *fl;
227 int rc;
228 struct nfs4_deviceid id;
229
230 dprintk("--> %s\n", __func__);
231 fl = kzalloc(sizeof(*fl), GFP_KERNEL);
232 if (!fl)
233 return NULL;
234
235 rc = filelayout_decode_layout(layoutid, fl, lgr, &id);
236 if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) {
237 _filelayout_free_lseg(fl);
238 return NULL;
239 }
240 return &fl->generic_hdr;
241}
242
243static void
244filelayout_free_lseg(struct pnfs_layout_segment *lseg)
245{
246 struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode);
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
248
249 dprintk("--> %s\n", __func__);
250 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache,
251 &fl->dsaddr->deviceid);
252 _filelayout_free_lseg(fl);
253}
254
255static struct pnfs_layoutdriver_type filelayout_type = {
256 .id = LAYOUT_NFSV4_1_FILES,
257 .name = "LAYOUT_NFSV4_1_FILES",
258 .owner = THIS_MODULE,
259 .set_layoutdriver = filelayout_set_layoutdriver,
260 .clear_layoutdriver = filelayout_clear_layoutdriver,
261 .alloc_lseg = filelayout_alloc_lseg,
262 .free_lseg = filelayout_free_lseg,
263};
264
265static int __init nfs4filelayout_init(void)
266{
267 printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n",
268 __func__);
269 return pnfs_register_layoutdriver(&filelayout_type);
270}
271
272static void __exit nfs4filelayout_exit(void)
273{
274 printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n",
275 __func__);
276 pnfs_unregister_layoutdriver(&filelayout_type);
277}
278
279module_init(nfs4filelayout_init);
280module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
new file mode 100644
index 000000000000..bbf60dd2ab9d
--- /dev/null
+++ b/fs/nfs/nfs4filelayout.h
@@ -0,0 +1,94 @@
1/*
2 * NFSv4 file layout driver data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_NFS4FILELAYOUT_H
31#define FS_NFS_NFS4FILELAYOUT_H
32
33#include "pnfs.h"
34
35/*
36 * Field testing shows we need to support upto 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256
39 * RFC 5661 multipath_list4 structures.
40 */
41#define NFS4_PNFS_MAX_STRIPE_CNT 4096
42#define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */
43
44enum stripetype4 {
45 STRIPE_SPARSE = 1,
46 STRIPE_DENSE = 2
47};
48
49/* Individual ip address */
50struct nfs4_pnfs_ds {
51 struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
52 u32 ds_ip_addr;
53 u32 ds_port;
54 struct nfs_client *ds_clp;
55 atomic_t ds_count;
56};
57
58struct nfs4_file_layout_dsaddr {
59 struct pnfs_deviceid_node deviceid;
60 u32 stripe_count;
61 u8 *stripe_indices;
62 u32 ds_num;
63 struct nfs4_pnfs_ds *ds_list[1];
64};
65
66struct nfs4_filelayout_segment {
67 struct pnfs_layout_segment generic_hdr;
68 u32 stripe_type;
69 u32 commit_through_mds;
70 u32 stripe_unit;
71 u32 first_stripe_index;
72 u64 pattern_offset;
73 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
74 unsigned int num_fh;
75 struct nfs_fh **fh_array;
76};
77
78static inline struct nfs4_filelayout_segment *
79FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
80{
81 return container_of(lseg,
82 struct nfs4_filelayout_segment,
83 generic_hdr);
84}
85
86extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *);
87extern void print_ds(struct nfs4_pnfs_ds *ds);
88extern void print_deviceid(struct nfs4_deviceid *dev_id);
89extern struct nfs4_file_layout_dsaddr *
90nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id);
91struct nfs4_file_layout_dsaddr *
92get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
93
94#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
new file mode 100644
index 000000000000..51fe64ace55a
--- /dev/null
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -0,0 +1,448 @@
1/*
2 * Device operations for the pnfs nfs4 file layout driver.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 * Garth Goodson <Garth.Goodson@netapp.com>
10 *
11 * Permission is granted to use, copy, create derivative works, and
12 * redistribute this software and such derivative works for any purpose,
13 * so long as the name of the University of Michigan is not used in
14 * any advertising or publicity pertaining to the use or distribution
15 * of this software without specific, written prior authorization. If
16 * the above copyright notice or any other identification of the
17 * University of Michigan is included in any copy of any portion of
18 * this software, then the disclaimer below must also be included.
19 *
20 * This software is provided as is, without representation or warranty
21 * of any kind either express or implied, including without limitation
22 * the implied warranties of merchantability, fitness for a particular
23 * purpose, or noninfringement. The Regents of the University of
24 * Michigan shall not be liable for any damages, including special,
25 * indirect, incidental, or consequential damages, with respect to any
26 * claim arising out of or in connection with the use of the software,
27 * even if it has been or is hereafter advised of the possibility of
28 * such damages.
29 */
30
31#include <linux/nfs_fs.h>
32#include <linux/vmalloc.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39/*
40 * Data server cache
41 *
42 * Data servers can be mapped to different device ids.
43 * nfs4_pnfs_ds reference counting
44 * - set to 1 on allocation
45 * - incremented when a device id maps a data server already in the cache.
46 * - decremented when deviceid is removed from the cache.
47 */
48DEFINE_SPINLOCK(nfs4_ds_cache_lock);
49static LIST_HEAD(nfs4_data_server_cache);
50
51/* Debug routines */
52void
53print_ds(struct nfs4_pnfs_ds *ds)
54{
55 if (ds == NULL) {
56 printk("%s NULL device\n", __func__);
57 return;
58 }
59 printk(" ip_addr %x port %hu\n"
60 " ref count %d\n"
61 " client %p\n"
62 " cl_exchange_flags %x\n",
63 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
64 atomic_read(&ds->ds_count), ds->ds_clp,
65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
66}
67
68void
69print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
70{
71 int i;
72
73 ifdebug(FACILITY) {
74 printk("%s dsaddr->ds_num %d\n", __func__,
75 dsaddr->ds_num);
76 for (i = 0; i < dsaddr->ds_num; i++)
77 print_ds(dsaddr->ds_list[i]);
78 }
79}
80
81void print_deviceid(struct nfs4_deviceid *id)
82{
83 u32 *p = (u32 *)id;
84
85 dprintk("%s: device id= [%x%x%x%x]\n", __func__,
86 p[0], p[1], p[2], p[3]);
87}
88
89/* nfs4_ds_cache_lock is held */
90static struct nfs4_pnfs_ds *
91_data_server_lookup_locked(u32 ip_addr, u32 port)
92{
93 struct nfs4_pnfs_ds *ds;
94
95 dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
96 ntohl(ip_addr), ntohs(port));
97
98 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
99 if (ds->ds_ip_addr == ip_addr &&
100 ds->ds_port == port) {
101 return ds;
102 }
103 }
104 return NULL;
105}
106
107static void
108destroy_ds(struct nfs4_pnfs_ds *ds)
109{
110 dprintk("--> %s\n", __func__);
111 ifdebug(FACILITY)
112 print_ds(ds);
113
114 if (ds->ds_clp)
115 nfs_put_client(ds->ds_clp);
116 kfree(ds);
117}
118
119static void
120nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
121{
122 struct nfs4_pnfs_ds *ds;
123 int i;
124
125 print_deviceid(&dsaddr->deviceid.de_id);
126
127 for (i = 0; i < dsaddr->ds_num; i++) {
128 ds = dsaddr->ds_list[i];
129 if (ds != NULL) {
130 if (atomic_dec_and_lock(&ds->ds_count,
131 &nfs4_ds_cache_lock)) {
132 list_del_init(&ds->ds_node);
133 spin_unlock(&nfs4_ds_cache_lock);
134 destroy_ds(ds);
135 }
136 }
137 }
138 kfree(dsaddr->stripe_indices);
139 kfree(dsaddr);
140}
141
142void
143nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
144{
145 struct nfs4_file_layout_dsaddr *dsaddr =
146 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
147
148 nfs4_fl_free_deviceid(dsaddr);
149}
150
151static struct nfs4_pnfs_ds *
152nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
153{
154 struct nfs4_pnfs_ds *tmp_ds, *ds;
155
156 ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
157 if (!ds)
158 goto out;
159
160 spin_lock(&nfs4_ds_cache_lock);
161 tmp_ds = _data_server_lookup_locked(ip_addr, port);
162 if (tmp_ds == NULL) {
163 ds->ds_ip_addr = ip_addr;
164 ds->ds_port = port;
165 atomic_set(&ds->ds_count, 1);
166 INIT_LIST_HEAD(&ds->ds_node);
167 ds->ds_clp = NULL;
168 list_add(&ds->ds_node, &nfs4_data_server_cache);
169 dprintk("%s add new data server ip 0x%x\n", __func__,
170 ds->ds_ip_addr);
171 } else {
172 kfree(ds);
173 atomic_inc(&tmp_ds->ds_count);
174 dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
175 __func__, tmp_ds->ds_ip_addr,
176 atomic_read(&tmp_ds->ds_count));
177 ds = tmp_ds;
178 }
179 spin_unlock(&nfs4_ds_cache_lock);
180out:
181 return ds;
182}
183
184/*
185 * Currently only support ipv4, and one multi-path address.
186 */
187static struct nfs4_pnfs_ds *
188decode_and_add_ds(__be32 **pp, struct inode *inode)
189{
190 struct nfs4_pnfs_ds *ds = NULL;
191 char *buf;
192 const char *ipend, *pstr;
193 u32 ip_addr, port;
194 int nlen, rlen, i;
195 int tmp[2];
196 __be32 *r_netid, *r_addr, *p = *pp;
197
198 /* r_netid */
199 nlen = be32_to_cpup(p++);
200 r_netid = p;
201 p += XDR_QUADLEN(nlen);
202
203 /* r_addr */
204 rlen = be32_to_cpup(p++);
205 r_addr = p;
206 p += XDR_QUADLEN(rlen);
207 *pp = p;
208
209 /* Check that netid is "tcp" */
210 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) {
211 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
212 goto out_err;
213 }
214
215 /* ipv6 length plus port is legal */
216 if (rlen > INET6_ADDRSTRLEN + 8) {
217 dprintk("%s Invalid address, length %d\n", __func__,
218 rlen);
219 goto out_err;
220 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL);
222 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen);
224
225 /* replace the port dots with dashes for the in4_pton() delimiter*/
226 for (i = 0; i < 2; i++) {
227 char *res = strrchr(buf, '.');
228 *res = '-';
229 }
230
231 /* Currently only support ipv4 address */
232 if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
233 dprintk("%s: Only ipv4 addresses supported\n", __func__);
234 goto out_free;
235 }
236
237 /* port */
238 pstr = ipend;
239 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
240 port = htons((tmp[0] << 8) | (tmp[1]));
241
242 ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
243 dprintk("%s Decoded address and port %s\n", __func__, buf);
244out_free:
245 kfree(buf);
246out_err:
247 return ds;
248}
249
250/* Decode opaque device data and return the result */
251static struct nfs4_file_layout_dsaddr*
252decode_device(struct inode *ino, struct pnfs_device *pdev)
253{
254 int i, dummy;
255 u32 cnt, num;
256 u8 *indexp;
257 __be32 *p = (__be32 *)pdev->area, *indicesp;
258 struct nfs4_file_layout_dsaddr *dsaddr;
259
260 /* Get the stripe count (number of stripe index) */
261 cnt = be32_to_cpup(p++);
262 dprintk("%s stripe count %d\n", __func__, cnt);
263 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
264 printk(KERN_WARNING "%s: stripe count %d greater than "
265 "supported maximum %d\n", __func__,
266 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
267 goto out_err;
268 }
269
270 /* Check the multipath list count */
271 indicesp = p;
272 p += XDR_QUADLEN(cnt << 2);
273 num = be32_to_cpup(p++);
274 dprintk("%s ds_num %u\n", __func__, num);
275 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
276 printk(KERN_WARNING "%s: multipath count %d greater than "
277 "supported maximum %d\n", __func__,
278 num, NFS4_PNFS_MAX_MULTI_CNT);
279 goto out_err;
280 }
281 dsaddr = kzalloc(sizeof(*dsaddr) +
282 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
283 GFP_KERNEL);
284 if (!dsaddr)
285 goto out_err;
286
287 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
288 if (!dsaddr->stripe_indices)
289 goto out_err_free;
290
291 dsaddr->stripe_count = cnt;
292 dsaddr->ds_num = num;
293
294 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
295
296 /* Go back an read stripe indices */
297 p = indicesp;
298 indexp = &dsaddr->stripe_indices[0];
299 for (i = 0; i < dsaddr->stripe_count; i++) {
300 *indexp = be32_to_cpup(p++);
301 if (*indexp >= num)
302 goto out_err_free;
303 indexp++;
304 }
305 /* Skip already read multipath list count */
306 p++;
307
308 for (i = 0; i < dsaddr->ds_num; i++) {
309 int j;
310
311 dummy = be32_to_cpup(p++); /* multipath count */
312 if (dummy > 1) {
313 printk(KERN_WARNING
314 "%s: Multipath count %d not supported, "
315 "skipping all greater than 1\n", __func__,
316 dummy);
317 }
318 for (j = 0; j < dummy; j++) {
319 if (j == 0) {
320 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
321 if (dsaddr->ds_list[i] == NULL)
322 goto out_err_free;
323 } else {
324 u32 len;
325 /* skip extra multipath */
326 len = be32_to_cpup(p++);
327 p += XDR_QUADLEN(len);
328 len = be32_to_cpup(p++);
329 p += XDR_QUADLEN(len);
330 continue;
331 }
332 }
333 }
334 return dsaddr;
335
336out_err_free:
337 nfs4_fl_free_deviceid(dsaddr);
338out_err:
339 dprintk("%s ERROR: returning NULL\n", __func__);
340 return NULL;
341}
342
343/*
344 * Decode the opaque device specified in 'dev'
345 * and add it to the list of available devices.
346 * If the deviceid is already cached, nfs4_add_deviceid will return
347 * a pointer to the cached struct and throw away the new.
348 */
349static struct nfs4_file_layout_dsaddr*
350decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
351{
352 struct nfs4_file_layout_dsaddr *dsaddr;
353 struct pnfs_deviceid_node *d;
354
355 dsaddr = decode_device(inode, dev);
356 if (!dsaddr) {
357 printk(KERN_WARNING "%s: Could not decode or add device\n",
358 __func__);
359 return NULL;
360 }
361
362 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
363 &dsaddr->deviceid);
364
365 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
366}
367
368/*
369 * Retrieve the information for dev_id, add it to the list
370 * of available devices, and return it.
371 */
372struct nfs4_file_layout_dsaddr *
373get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
374{
375 struct pnfs_device *pdev = NULL;
376 u32 max_resp_sz;
377 int max_pages;
378 struct page **pages = NULL;
379 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
380 int rc, i;
381 struct nfs_server *server = NFS_SERVER(inode);
382
383 /*
384 * Use the session max response size as the basis for setting
385 * GETDEVICEINFO's maxcount
386 */
387 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
388 max_pages = max_resp_sz >> PAGE_SHIFT;
389 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
390 __func__, inode, max_resp_sz, max_pages);
391
392 pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
393 if (pdev == NULL)
394 return NULL;
395
396 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
397 if (pages == NULL) {
398 kfree(pdev);
399 return NULL;
400 }
401 for (i = 0; i < max_pages; i++) {
402 pages[i] = alloc_page(GFP_KERNEL);
403 if (!pages[i])
404 goto out_free;
405 }
406
407 /* set pdev->area */
408 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
409 if (!pdev->area)
410 goto out_free;
411
412 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
413 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
414 pdev->pages = pages;
415 pdev->pgbase = 0;
416 pdev->pglen = PAGE_SIZE * max_pages;
417 pdev->mincount = 0;
418
419 rc = nfs4_proc_getdeviceinfo(server, pdev);
420 dprintk("%s getdevice info returns %d\n", __func__, rc);
421 if (rc)
422 goto out_free;
423
424 /*
425 * Found new device, need to decode it and then add it to the
426 * list of known devices for this mountpoint.
427 */
428 dsaddr = decode_and_add_device(inode, pdev);
429out_free:
430 if (pdev->area != NULL)
431 vunmap(pdev->area);
432 for (i = 0; i < max_pages; i++)
433 __free_page(pages[i]);
434 kfree(pages);
435 kfree(pdev);
436 dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
437 return dsaddr;
438}
439
440struct nfs4_file_layout_dsaddr *
441nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
442{
443 struct pnfs_deviceid_node *d;
444
445 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
446 return (d == NULL) ? NULL :
447 container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
448}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 089da5b5d20a..32c8758c99fd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -55,6 +55,7 @@
55#include "internal.h" 55#include "internal.h"
56#include "iostat.h" 56#include "iostat.h"
57#include "callback.h" 57#include "callback.h"
58#include "pnfs.h"
58 59
59#define NFSDBG_FACILITY NFSDBG_PROC 60#define NFSDBG_FACILITY NFSDBG_PROC
60 61
@@ -129,7 +130,8 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE
129 | FATTR4_WORD0_MAXREAD 130 | FATTR4_WORD0_MAXREAD
130 | FATTR4_WORD0_MAXWRITE 131 | FATTR4_WORD0_MAXWRITE
131 | FATTR4_WORD0_LEASE_TIME, 132 | FATTR4_WORD0_LEASE_TIME,
132 0 133 FATTR4_WORD1_TIME_DELTA
134 | FATTR4_WORD1_FS_LAYOUT_TYPES
133}; 135};
134 136
135const u32 nfs4_fs_locations_bitmap[2] = { 137const u32 nfs4_fs_locations_bitmap[2] = {
@@ -255,9 +257,6 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
255 nfs4_state_mark_reclaim_nograce(clp, state); 257 nfs4_state_mark_reclaim_nograce(clp, state);
256 goto do_state_recovery; 258 goto do_state_recovery;
257 case -NFS4ERR_STALE_STATEID: 259 case -NFS4ERR_STALE_STATEID:
258 if (state == NULL)
259 break;
260 nfs4_state_mark_reclaim_reboot(clp, state);
261 case -NFS4ERR_STALE_CLIENTID: 260 case -NFS4ERR_STALE_CLIENTID:
262 case -NFS4ERR_EXPIRED: 261 case -NFS4ERR_EXPIRED:
263 goto do_state_recovery; 262 goto do_state_recovery;
@@ -334,10 +333,12 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp
334 * Must be called while holding tbl->slot_tbl_lock 333 * Must be called while holding tbl->slot_tbl_lock
335 */ 334 */
336static void 335static void
337nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) 336nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot)
338{ 337{
338 int free_slotid = free_slot - tbl->slots;
339 int slotid = free_slotid; 339 int slotid = free_slotid;
340 340
341 BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE);
341 /* clear used bit in bitmap */ 342 /* clear used bit in bitmap */
342 __clear_bit(slotid, tbl->used_slots); 343 __clear_bit(slotid, tbl->used_slots);
343 344
@@ -379,7 +380,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
379 struct nfs4_slot_table *tbl; 380 struct nfs4_slot_table *tbl;
380 381
381 tbl = &res->sr_session->fc_slot_table; 382 tbl = &res->sr_session->fc_slot_table;
382 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { 383 if (!res->sr_slot) {
383 /* just wake up the next guy waiting since 384 /* just wake up the next guy waiting since
384 * we may have not consumed a slot after all */ 385 * we may have not consumed a slot after all */
385 dprintk("%s: No slot\n", __func__); 386 dprintk("%s: No slot\n", __func__);
@@ -387,17 +388,15 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res)
387 } 388 }
388 389
389 spin_lock(&tbl->slot_tbl_lock); 390 spin_lock(&tbl->slot_tbl_lock);
390 nfs4_free_slot(tbl, res->sr_slotid); 391 nfs4_free_slot(tbl, res->sr_slot);
391 nfs41_check_drain_session_complete(res->sr_session); 392 nfs41_check_drain_session_complete(res->sr_session);
392 spin_unlock(&tbl->slot_tbl_lock); 393 spin_unlock(&tbl->slot_tbl_lock);
393 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 394 res->sr_slot = NULL;
394} 395}
395 396
396static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) 397static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res)
397{ 398{
398 unsigned long timestamp; 399 unsigned long timestamp;
399 struct nfs4_slot_table *tbl;
400 struct nfs4_slot *slot;
401 struct nfs_client *clp; 400 struct nfs_client *clp;
402 401
403 /* 402 /*
@@ -410,17 +409,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
410 res->sr_status = NFS_OK; 409 res->sr_status = NFS_OK;
411 410
412 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ 411 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */
413 if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) 412 if (!res->sr_slot)
414 goto out; 413 goto out;
415 414
416 tbl = &res->sr_session->fc_slot_table;
417 slot = tbl->slots + res->sr_slotid;
418
419 /* Check the SEQUENCE operation status */ 415 /* Check the SEQUENCE operation status */
420 switch (res->sr_status) { 416 switch (res->sr_status) {
421 case 0: 417 case 0:
422 /* Update the slot's sequence and clientid lease timer */ 418 /* Update the slot's sequence and clientid lease timer */
423 ++slot->seq_nr; 419 ++res->sr_slot->seq_nr;
424 timestamp = res->sr_renewal_time; 420 timestamp = res->sr_renewal_time;
425 clp = res->sr_session->clp; 421 clp = res->sr_session->clp;
426 do_renew_lease(clp, timestamp); 422 do_renew_lease(clp, timestamp);
@@ -433,12 +429,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
433 * returned NFS4ERR_DELAY as per Section 2.10.6.2 429 * returned NFS4ERR_DELAY as per Section 2.10.6.2
434 * of RFC5661. 430 * of RFC5661.
435 */ 431 */
436 dprintk("%s: slot=%d seq=%d: Operation in progress\n", 432 dprintk("%s: slot=%ld seq=%d: Operation in progress\n",
437 __func__, res->sr_slotid, slot->seq_nr); 433 __func__,
434 res->sr_slot - res->sr_session->fc_slot_table.slots,
435 res->sr_slot->seq_nr);
438 goto out_retry; 436 goto out_retry;
439 default: 437 default:
440 /* Just update the slot sequence no. */ 438 /* Just update the slot sequence no. */
441 ++slot->seq_nr; 439 ++res->sr_slot->seq_nr;
442 } 440 }
443out: 441out:
444 /* The session may be reset by one of the error handlers. */ 442 /* The session may be reset by one of the error handlers. */
@@ -505,10 +503,9 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
505 503
506 dprintk("--> %s\n", __func__); 504 dprintk("--> %s\n", __func__);
507 /* slot already allocated? */ 505 /* slot already allocated? */
508 if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) 506 if (res->sr_slot != NULL)
509 return 0; 507 return 0;
510 508
511 res->sr_slotid = NFS4_MAX_SLOT_TABLE;
512 tbl = &session->fc_slot_table; 509 tbl = &session->fc_slot_table;
513 510
514 spin_lock(&tbl->slot_tbl_lock); 511 spin_lock(&tbl->slot_tbl_lock);
@@ -550,7 +547,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
550 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); 547 dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr);
551 548
552 res->sr_session = session; 549 res->sr_session = session;
553 res->sr_slotid = slotid; 550 res->sr_slot = slot;
554 res->sr_renewal_time = jiffies; 551 res->sr_renewal_time = jiffies;
555 res->sr_status_flags = 0; 552 res->sr_status_flags = 0;
556 /* 553 /*
@@ -576,8 +573,9 @@ int nfs4_setup_sequence(const struct nfs_server *server,
576 goto out; 573 goto out;
577 } 574 }
578 575
579 dprintk("--> %s clp %p session %p sr_slotid %d\n", 576 dprintk("--> %s clp %p session %p sr_slot %ld\n",
580 __func__, session->clp, session, res->sr_slotid); 577 __func__, session->clp, session, res->sr_slot ?
578 res->sr_slot - session->fc_slot_table.slots : -1);
581 579
582 ret = nfs41_setup_sequence(session, args, res, cache_reply, 580 ret = nfs41_setup_sequence(session, args, res, cache_reply,
583 task); 581 task);
@@ -650,7 +648,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
650 .callback_data = &data 648 .callback_data = &data
651 }; 649 };
652 650
653 res->sr_slotid = NFS4_MAX_SLOT_TABLE; 651 res->sr_slot = NULL;
654 if (privileged) 652 if (privileged)
655 task_setup.callback_ops = &nfs41_call_priv_sync_ops; 653 task_setup.callback_ops = &nfs41_call_priv_sync_ops;
656 task = rpc_run_task(&task_setup); 654 task = rpc_run_task(&task_setup);
@@ -735,7 +733,6 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
735 p->o_res.server = p->o_arg.server; 733 p->o_res.server = p->o_arg.server;
736 nfs_fattr_init(&p->f_attr); 734 nfs_fattr_init(&p->f_attr);
737 nfs_fattr_init(&p->dir_attr); 735 nfs_fattr_init(&p->dir_attr);
738 p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
739} 736}
740 737
741static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, 738static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
@@ -1120,6 +1117,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1120 clear_bit(NFS_DELEGATED_STATE, &state->flags); 1117 clear_bit(NFS_DELEGATED_STATE, &state->flags);
1121 smp_rmb(); 1118 smp_rmb();
1122 if (state->n_rdwr != 0) { 1119 if (state->n_rdwr != 0) {
1120 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1123 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); 1121 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
1124 if (ret != 0) 1122 if (ret != 0)
1125 return ret; 1123 return ret;
@@ -1127,6 +1125,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1127 return -ESTALE; 1125 return -ESTALE;
1128 } 1126 }
1129 if (state->n_wronly != 0) { 1127 if (state->n_wronly != 0) {
1128 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1130 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); 1129 ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
1131 if (ret != 0) 1130 if (ret != 0)
1132 return ret; 1131 return ret;
@@ -1134,6 +1133,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
1134 return -ESTALE; 1133 return -ESTALE;
1135 } 1134 }
1136 if (state->n_rdonly != 0) { 1135 if (state->n_rdonly != 0) {
1136 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1137 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); 1137 ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
1138 if (ret != 0) 1138 if (ret != 0)
1139 return ret; 1139 return ret;
@@ -1188,7 +1188,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
1188 int err; 1188 int err;
1189 do { 1189 do {
1190 err = _nfs4_do_open_reclaim(ctx, state); 1190 err = _nfs4_do_open_reclaim(ctx, state);
1191 if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) 1191 if (err != -NFS4ERR_DELAY)
1192 break; 1192 break;
1193 nfs4_handle_exception(server, err, &exception); 1193 nfs4_handle_exception(server, err, &exception);
1194 } while (exception.retry); 1194 } while (exception.retry);
@@ -1258,6 +1258,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1258 case -NFS4ERR_ADMIN_REVOKED: 1258 case -NFS4ERR_ADMIN_REVOKED:
1259 case -NFS4ERR_BAD_STATEID: 1259 case -NFS4ERR_BAD_STATEID:
1260 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1260 nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
1261 case -EKEYEXPIRED:
1262 /*
1263 * User RPCSEC_GSS context has expired.
1264 * We cannot recover this stateid now, so
1265 * skip it and allow recovery thread to
1266 * proceed.
1267 */
1261 case -ENOMEM: 1268 case -ENOMEM:
1262 err = 0; 1269 err = 0;
1263 goto out; 1270 goto out;
@@ -1605,7 +1612,6 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state
1605 goto out; 1612 goto out;
1606 case -NFS4ERR_GRACE: 1613 case -NFS4ERR_GRACE:
1607 case -NFS4ERR_DELAY: 1614 case -NFS4ERR_DELAY:
1608 case -EKEYEXPIRED:
1609 nfs4_handle_exception(server, err, &exception); 1615 nfs4_handle_exception(server, err, &exception);
1610 err = 0; 1616 err = 0;
1611 } 1617 }
@@ -1975,7 +1981,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
1975 calldata->res.fattr = &calldata->fattr; 1981 calldata->res.fattr = &calldata->fattr;
1976 calldata->res.seqid = calldata->arg.seqid; 1982 calldata->res.seqid = calldata->arg.seqid;
1977 calldata->res.server = server; 1983 calldata->res.server = server;
1978 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
1979 path_get(path); 1984 path_get(path);
1980 calldata->path = *path; 1985 calldata->path = *path;
1981 1986
@@ -1998,120 +2003,17 @@ out:
1998 return status; 2003 return status;
1999} 2004}
2000 2005
2001static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode) 2006static struct inode *
2007nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr)
2002{ 2008{
2003 struct file *filp;
2004 int ret;
2005
2006 /* If the open_intent is for execute, we have an extra check to make */
2007 if (fmode & FMODE_EXEC) {
2008 ret = nfs_may_open(state->inode,
2009 state->owner->so_cred,
2010 nd->intent.open.flags);
2011 if (ret < 0)
2012 goto out_close;
2013 }
2014 filp = lookup_instantiate_filp(nd, path->dentry, NULL);
2015 if (!IS_ERR(filp)) {
2016 struct nfs_open_context *ctx;
2017 ctx = nfs_file_open_context(filp);
2018 ctx->state = state;
2019 return 0;
2020 }
2021 ret = PTR_ERR(filp);
2022out_close:
2023 nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE));
2024 return ret;
2025}
2026
2027struct dentry *
2028nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
2029{
2030 struct path path = {
2031 .mnt = nd->path.mnt,
2032 .dentry = dentry,
2033 };
2034 struct dentry *parent;
2035 struct iattr attr;
2036 struct rpc_cred *cred;
2037 struct nfs4_state *state; 2009 struct nfs4_state *state;
2038 struct dentry *res;
2039 int open_flags = nd->intent.open.flags;
2040 fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
2041
2042 if (nd->flags & LOOKUP_CREATE) {
2043 attr.ia_mode = nd->intent.open.create_mode;
2044 attr.ia_valid = ATTR_MODE;
2045 if (!IS_POSIXACL(dir))
2046 attr.ia_mode &= ~current_umask();
2047 } else {
2048 open_flags &= ~O_EXCL;
2049 attr.ia_valid = 0;
2050 BUG_ON(open_flags & O_CREAT);
2051 }
2052 2010
2053 cred = rpc_lookup_cred();
2054 if (IS_ERR(cred))
2055 return (struct dentry *)cred;
2056 parent = dentry->d_parent;
2057 /* Protect against concurrent sillydeletes */ 2011 /* Protect against concurrent sillydeletes */
2058 nfs_block_sillyrename(parent); 2012 state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred);
2059 state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); 2013 if (IS_ERR(state))
2060 put_rpccred(cred); 2014 return ERR_CAST(state);
2061 if (IS_ERR(state)) { 2015 ctx->state = state;
2062 if (PTR_ERR(state) == -ENOENT) { 2016 return igrab(state->inode);
2063 d_add(dentry, NULL);
2064 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2065 }
2066 nfs_unblock_sillyrename(parent);
2067 return (struct dentry *)state;
2068 }
2069 res = d_add_unique(dentry, igrab(state->inode));
2070 if (res != NULL)
2071 path.dentry = res;
2072 nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir));
2073 nfs_unblock_sillyrename(parent);
2074 nfs4_intent_set_file(nd, &path, state, fmode);
2075 return res;
2076}
2077
2078int
2079nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
2080{
2081 struct path path = {
2082 .mnt = nd->path.mnt,
2083 .dentry = dentry,
2084 };
2085 struct rpc_cred *cred;
2086 struct nfs4_state *state;
2087 fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE);
2088
2089 cred = rpc_lookup_cred();
2090 if (IS_ERR(cred))
2091 return PTR_ERR(cred);
2092 state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred);
2093 put_rpccred(cred);
2094 if (IS_ERR(state)) {
2095 switch (PTR_ERR(state)) {
2096 case -EPERM:
2097 case -EACCES:
2098 case -EDQUOT:
2099 case -ENOSPC:
2100 case -EROFS:
2101 return PTR_ERR(state);
2102 default:
2103 goto out_drop;
2104 }
2105 }
2106 if (state->inode == dentry->d_inode) {
2107 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2108 nfs4_intent_set_file(nd, &path, state, fmode);
2109 return 1;
2110 }
2111 nfs4_close_sync(&path, state, fmode);
2112out_drop:
2113 d_drop(dentry);
2114 return 0;
2115} 2017}
2116 2018
2117static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) 2019static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
@@ -2568,36 +2470,34 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
2568 2470
2569static int 2471static int
2570nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 2472nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2571 int flags, struct nameidata *nd) 2473 int flags, struct nfs_open_context *ctx)
2572{ 2474{
2573 struct path path = { 2475 struct path my_path = {
2574 .mnt = nd->path.mnt,
2575 .dentry = dentry, 2476 .dentry = dentry,
2576 }; 2477 };
2478 struct path *path = &my_path;
2577 struct nfs4_state *state; 2479 struct nfs4_state *state;
2578 struct rpc_cred *cred; 2480 struct rpc_cred *cred = NULL;
2579 fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE); 2481 fmode_t fmode = 0;
2580 int status = 0; 2482 int status = 0;
2581 2483
2582 cred = rpc_lookup_cred(); 2484 if (ctx != NULL) {
2583 if (IS_ERR(cred)) { 2485 cred = ctx->cred;
2584 status = PTR_ERR(cred); 2486 path = &ctx->path;
2585 goto out; 2487 fmode = ctx->mode;
2586 } 2488 }
2587 state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred); 2489 state = nfs4_do_open(dir, path, fmode, flags, sattr, cred);
2588 d_drop(dentry); 2490 d_drop(dentry);
2589 if (IS_ERR(state)) { 2491 if (IS_ERR(state)) {
2590 status = PTR_ERR(state); 2492 status = PTR_ERR(state);
2591 goto out_putcred; 2493 goto out;
2592 } 2494 }
2593 d_add(dentry, igrab(state->inode)); 2495 d_add(dentry, igrab(state->inode));
2594 nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); 2496 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2595 if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) 2497 if (ctx != NULL)
2596 status = nfs4_intent_set_file(nd, &path, state, fmode); 2498 ctx->state = state;
2597 else 2499 else
2598 nfs4_close_sync(&path, state, fmode); 2500 nfs4_close_sync(path, state, fmode);
2599out_putcred:
2600 put_rpccred(cred);
2601out: 2501out:
2602 return status; 2502 return status;
2603} 2503}
@@ -2655,6 +2555,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
2655 2555
2656 args->bitmask = server->cache_consistency_bitmask; 2556 args->bitmask = server->cache_consistency_bitmask;
2657 res->server = server; 2557 res->server = server;
2558 res->seq_res.sr_slot = NULL;
2658 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; 2559 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
2659} 2560}
2660 2561
@@ -2671,18 +2572,46 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
2671 return 1; 2572 return 1;
2672} 2573}
2673 2574
2575static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
2576{
2577 struct nfs_server *server = NFS_SERVER(dir);
2578 struct nfs_renameargs *arg = msg->rpc_argp;
2579 struct nfs_renameres *res = msg->rpc_resp;
2580
2581 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME];
2582 arg->bitmask = server->attr_bitmask;
2583 res->server = server;
2584}
2585
2586static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
2587 struct inode *new_dir)
2588{
2589 struct nfs_renameres *res = task->tk_msg.rpc_resp;
2590
2591 if (!nfs4_sequence_done(task, &res->seq_res))
2592 return 0;
2593 if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
2594 return 0;
2595
2596 update_changeattr(old_dir, &res->old_cinfo);
2597 nfs_post_op_update_inode(old_dir, res->old_fattr);
2598 update_changeattr(new_dir, &res->new_cinfo);
2599 nfs_post_op_update_inode(new_dir, res->new_fattr);
2600 return 1;
2601}
2602
2674static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, 2603static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2675 struct inode *new_dir, struct qstr *new_name) 2604 struct inode *new_dir, struct qstr *new_name)
2676{ 2605{
2677 struct nfs_server *server = NFS_SERVER(old_dir); 2606 struct nfs_server *server = NFS_SERVER(old_dir);
2678 struct nfs4_rename_arg arg = { 2607 struct nfs_renameargs arg = {
2679 .old_dir = NFS_FH(old_dir), 2608 .old_dir = NFS_FH(old_dir),
2680 .new_dir = NFS_FH(new_dir), 2609 .new_dir = NFS_FH(new_dir),
2681 .old_name = old_name, 2610 .old_name = old_name,
2682 .new_name = new_name, 2611 .new_name = new_name,
2683 .bitmask = server->attr_bitmask, 2612 .bitmask = server->attr_bitmask,
2684 }; 2613 };
2685 struct nfs4_rename_res res = { 2614 struct nfs_renameres res = {
2686 .server = server, 2615 .server = server,
2687 }; 2616 };
2688 struct rpc_message msg = { 2617 struct rpc_message msg = {
@@ -2896,15 +2825,16 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
2896} 2825}
2897 2826
2898static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 2827static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2899 u64 cookie, struct page *page, unsigned int count, int plus) 2828 u64 cookie, struct page **pages, unsigned int count, int plus)
2900{ 2829{
2901 struct inode *dir = dentry->d_inode; 2830 struct inode *dir = dentry->d_inode;
2902 struct nfs4_readdir_arg args = { 2831 struct nfs4_readdir_arg args = {
2903 .fh = NFS_FH(dir), 2832 .fh = NFS_FH(dir),
2904 .pages = &page, 2833 .pages = pages,
2905 .pgbase = 0, 2834 .pgbase = 0,
2906 .count = count, 2835 .count = count,
2907 .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, 2836 .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
2837 .plus = plus,
2908 }; 2838 };
2909 struct nfs4_readdir_res res; 2839 struct nfs4_readdir_res res;
2910 struct rpc_message msg = { 2840 struct rpc_message msg = {
@@ -2932,14 +2862,14 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2932} 2862}
2933 2863
2934static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 2864static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2935 u64 cookie, struct page *page, unsigned int count, int plus) 2865 u64 cookie, struct page **pages, unsigned int count, int plus)
2936{ 2866{
2937 struct nfs4_exception exception = { }; 2867 struct nfs4_exception exception = { };
2938 int err; 2868 int err;
2939 do { 2869 do {
2940 err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), 2870 err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode),
2941 _nfs4_proc_readdir(dentry, cred, cookie, 2871 _nfs4_proc_readdir(dentry, cred, cookie,
2942 page, count, plus), 2872 pages, count, plus),
2943 &exception); 2873 &exception);
2944 } while (exception.retry); 2874 } while (exception.retry);
2945 return err; 2875 return err;
@@ -3490,9 +3420,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3490 nfs4_state_mark_reclaim_nograce(clp, state); 3420 nfs4_state_mark_reclaim_nograce(clp, state);
3491 goto do_state_recovery; 3421 goto do_state_recovery;
3492 case -NFS4ERR_STALE_STATEID: 3422 case -NFS4ERR_STALE_STATEID:
3493 if (state == NULL)
3494 break;
3495 nfs4_state_mark_reclaim_reboot(clp, state);
3496 case -NFS4ERR_STALE_CLIENTID: 3423 case -NFS4ERR_STALE_CLIENTID:
3497 case -NFS4ERR_EXPIRED: 3424 case -NFS4ERR_EXPIRED:
3498 goto do_state_recovery; 3425 goto do_state_recovery;
@@ -3626,7 +3553,6 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3626 case -NFS4ERR_RESOURCE: 3553 case -NFS4ERR_RESOURCE:
3627 /* The IBM lawyers misread another document! */ 3554 /* The IBM lawyers misread another document! */
3628 case -NFS4ERR_DELAY: 3555 case -NFS4ERR_DELAY:
3629 case -EKEYEXPIRED:
3630 err = nfs4_delay(clp->cl_rpcclient, &timeout); 3556 err = nfs4_delay(clp->cl_rpcclient, &timeout);
3631 } 3557 }
3632 } while (err == 0); 3558 } while (err == 0);
@@ -3721,7 +3647,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
3721 memcpy(&data->stateid, stateid, sizeof(data->stateid)); 3647 memcpy(&data->stateid, stateid, sizeof(data->stateid));
3722 data->res.fattr = &data->fattr; 3648 data->res.fattr = &data->fattr;
3723 data->res.server = server; 3649 data->res.server = server;
3724 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
3725 nfs_fattr_init(data->res.fattr); 3650 nfs_fattr_init(data->res.fattr);
3726 data->timestamp = jiffies; 3651 data->timestamp = jiffies;
3727 data->rpc_status = 0; 3652 data->rpc_status = 0;
@@ -3874,7 +3799,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
3874 p->arg.fl = &p->fl; 3799 p->arg.fl = &p->fl;
3875 p->arg.seqid = seqid; 3800 p->arg.seqid = seqid;
3876 p->res.seqid = seqid; 3801 p->res.seqid = seqid;
3877 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
3878 p->arg.stateid = &lsp->ls_stateid; 3802 p->arg.stateid = &lsp->ls_stateid;
3879 p->lsp = lsp; 3803 p->lsp = lsp;
3880 atomic_inc(&lsp->ls_count); 3804 atomic_inc(&lsp->ls_count);
@@ -4054,7 +3978,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
4054 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; 3978 p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
4055 p->arg.lock_owner.id = lsp->ls_id.id; 3979 p->arg.lock_owner.id = lsp->ls_id.id;
4056 p->res.lock_seqid = p->arg.lock_seqid; 3980 p->res.lock_seqid = p->arg.lock_seqid;
4057 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
4058 p->lsp = lsp; 3981 p->lsp = lsp;
4059 p->server = server; 3982 p->server = server;
4060 atomic_inc(&lsp->ls_count); 3983 atomic_inc(&lsp->ls_count);
@@ -4241,7 +4164,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
4241 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) 4164 if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
4242 return 0; 4165 return 0;
4243 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); 4166 err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
4244 if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) 4167 if (err != -NFS4ERR_DELAY)
4245 break; 4168 break;
4246 nfs4_handle_exception(server, err, &exception); 4169 nfs4_handle_exception(server, err, &exception);
4247 } while (exception.retry); 4170 } while (exception.retry);
@@ -4266,7 +4189,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
4266 goto out; 4189 goto out;
4267 case -NFS4ERR_GRACE: 4190 case -NFS4ERR_GRACE:
4268 case -NFS4ERR_DELAY: 4191 case -NFS4ERR_DELAY:
4269 case -EKEYEXPIRED:
4270 nfs4_handle_exception(server, err, &exception); 4192 nfs4_handle_exception(server, err, &exception);
4271 err = 0; 4193 err = 0;
4272 } 4194 }
@@ -4412,13 +4334,21 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4412 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4334 nfs4_state_mark_reclaim_nograce(server->nfs_client, state);
4413 err = 0; 4335 err = 0;
4414 goto out; 4336 goto out;
4337 case -EKEYEXPIRED:
4338 /*
4339 * User RPCSEC_GSS context has expired.
4340 * We cannot recover this stateid now, so
4341 * skip it and allow recovery thread to
4342 * proceed.
4343 */
4344 err = 0;
4345 goto out;
4415 case -ENOMEM: 4346 case -ENOMEM:
4416 case -NFS4ERR_DENIED: 4347 case -NFS4ERR_DENIED:
4417 /* kill_proc(fl->fl_pid, SIGLOST, 1); */ 4348 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
4418 err = 0; 4349 err = 0;
4419 goto out; 4350 goto out;
4420 case -NFS4ERR_DELAY: 4351 case -NFS4ERR_DELAY:
4421 case -EKEYEXPIRED:
4422 break; 4352 break;
4423 } 4353 }
4424 err = nfs4_handle_exception(server, err, &exception); 4354 err = nfs4_handle_exception(server, err, &exception);
@@ -4647,7 +4577,6 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata)
4647 switch (task->tk_status) { 4577 switch (task->tk_status) {
4648 case -NFS4ERR_DELAY: 4578 case -NFS4ERR_DELAY:
4649 case -NFS4ERR_GRACE: 4579 case -NFS4ERR_GRACE:
4650 case -EKEYEXPIRED:
4651 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); 4580 dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status);
4652 rpc_delay(task, NFS4_POLL_RETRY_MIN); 4581 rpc_delay(task, NFS4_POLL_RETRY_MIN);
4653 task->tk_status = 0; 4582 task->tk_status = 0;
@@ -4687,7 +4616,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
4687 }; 4616 };
4688 int status; 4617 int status;
4689 4618
4690 res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
4691 dprintk("--> %s\n", __func__); 4619 dprintk("--> %s\n", __func__);
4692 task = rpc_run_task(&task_setup); 4620 task = rpc_run_task(&task_setup);
4693 4621
@@ -4914,49 +4842,56 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
4914 args->bc_attrs.max_reqs); 4842 args->bc_attrs.max_reqs);
4915} 4843}
4916 4844
4917static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) 4845static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4918{ 4846{
4919 if (rcvd <= sent) 4847 struct nfs4_channel_attrs *sent = &args->fc_attrs;
4920 return 0; 4848 struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
4921 printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " 4849
4922 "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); 4850 if (rcvd->headerpadsz > sent->headerpadsz)
4923 return -EINVAL; 4851 return -EINVAL;
4852 if (rcvd->max_resp_sz > sent->max_resp_sz)
4853 return -EINVAL;
4854 /*
4855 * Our requested max_ops is the minimum we need; we're not
4856 * prepared to break up compounds into smaller pieces than that.
4857 * So, no point even trying to continue if the server won't
4858 * cooperate:
4859 */
4860 if (rcvd->max_ops < sent->max_ops)
4861 return -EINVAL;
4862 if (rcvd->max_reqs == 0)
4863 return -EINVAL;
4864 return 0;
4924} 4865}
4925 4866
4926#define _verify_fore_channel_attr(_name_) \ 4867static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session)
4927 _verify_channel_attr("fore", #_name_, \ 4868{
4928 args->fc_attrs._name_, \ 4869 struct nfs4_channel_attrs *sent = &args->bc_attrs;
4929 session->fc_attrs._name_) 4870 struct nfs4_channel_attrs *rcvd = &session->bc_attrs;
4930 4871
4931#define _verify_back_channel_attr(_name_) \ 4872 if (rcvd->max_rqst_sz > sent->max_rqst_sz)
4932 _verify_channel_attr("back", #_name_, \ 4873 return -EINVAL;
4933 args->bc_attrs._name_, \ 4874 if (rcvd->max_resp_sz < sent->max_resp_sz)
4934 session->bc_attrs._name_) 4875 return -EINVAL;
4876 if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached)
4877 return -EINVAL;
4878 /* These would render the backchannel useless: */
4879 if (rcvd->max_ops == 0)
4880 return -EINVAL;
4881 if (rcvd->max_reqs == 0)
4882 return -EINVAL;
4883 return 0;
4884}
4935 4885
4936/*
4937 * The server is not allowed to increase the fore channel header pad size,
4938 * maximum response size, or maximum number of operations.
4939 *
4940 * The back channel attributes are only negotiatied down: We send what the
4941 * (back channel) server insists upon.
4942 */
4943static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, 4886static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args,
4944 struct nfs4_session *session) 4887 struct nfs4_session *session)
4945{ 4888{
4946 int ret = 0; 4889 int ret;
4947
4948 ret |= _verify_fore_channel_attr(headerpadsz);
4949 ret |= _verify_fore_channel_attr(max_resp_sz);
4950 ret |= _verify_fore_channel_attr(max_ops);
4951
4952 ret |= _verify_back_channel_attr(headerpadsz);
4953 ret |= _verify_back_channel_attr(max_rqst_sz);
4954 ret |= _verify_back_channel_attr(max_resp_sz);
4955 ret |= _verify_back_channel_attr(max_resp_sz_cached);
4956 ret |= _verify_back_channel_attr(max_ops);
4957 ret |= _verify_back_channel_attr(max_reqs);
4958 4890
4959 return ret; 4891 ret = nfs4_verify_fore_channel_attrs(args, session);
4892 if (ret)
4893 return ret;
4894 return nfs4_verify_back_channel_attrs(args, session);
4960} 4895}
4961 4896
4962static int _nfs4_proc_create_session(struct nfs_client *clp) 4897static int _nfs4_proc_create_session(struct nfs_client *clp)
@@ -5111,7 +5046,6 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
5111{ 5046{
5112 switch(task->tk_status) { 5047 switch(task->tk_status) {
5113 case -NFS4ERR_DELAY: 5048 case -NFS4ERR_DELAY:
5114 case -EKEYEXPIRED:
5115 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5049 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5116 return -EAGAIN; 5050 return -EAGAIN;
5117 default: 5051 default:
@@ -5180,12 +5114,11 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_
5180 5114
5181 if (!atomic_inc_not_zero(&clp->cl_count)) 5115 if (!atomic_inc_not_zero(&clp->cl_count))
5182 return ERR_PTR(-EIO); 5116 return ERR_PTR(-EIO);
5183 calldata = kmalloc(sizeof(*calldata), GFP_NOFS); 5117 calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
5184 if (calldata == NULL) { 5118 if (calldata == NULL) {
5185 nfs_put_client(clp); 5119 nfs_put_client(clp);
5186 return ERR_PTR(-ENOMEM); 5120 return ERR_PTR(-ENOMEM);
5187 } 5121 }
5188 calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE;
5189 msg.rpc_argp = &calldata->args; 5122 msg.rpc_argp = &calldata->args;
5190 msg.rpc_resp = &calldata->res; 5123 msg.rpc_resp = &calldata->res;
5191 calldata->clp = clp; 5124 calldata->clp = clp;
@@ -5254,7 +5187,6 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5254 case -NFS4ERR_WRONG_CRED: /* What to do here? */ 5187 case -NFS4ERR_WRONG_CRED: /* What to do here? */
5255 break; 5188 break;
5256 case -NFS4ERR_DELAY: 5189 case -NFS4ERR_DELAY:
5257 case -EKEYEXPIRED:
5258 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5190 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5259 return -EAGAIN; 5191 return -EAGAIN;
5260 default: 5192 default:
@@ -5317,7 +5249,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5317 goto out; 5249 goto out;
5318 calldata->clp = clp; 5250 calldata->clp = clp;
5319 calldata->arg.one_fs = 0; 5251 calldata->arg.one_fs = 0;
5320 calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
5321 5252
5322 msg.rpc_argp = &calldata->arg; 5253 msg.rpc_argp = &calldata->arg;
5323 msg.rpc_resp = &calldata->res; 5254 msg.rpc_resp = &calldata->res;
@@ -5333,6 +5264,147 @@ out:
5333 dprintk("<-- %s status=%d\n", __func__, status); 5264 dprintk("<-- %s status=%d\n", __func__, status);
5334 return status; 5265 return status;
5335} 5266}
5267
5268static void
5269nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
5270{
5271 struct nfs4_layoutget *lgp = calldata;
5272 struct inode *ino = lgp->args.inode;
5273 struct nfs_server *server = NFS_SERVER(ino);
5274
5275 dprintk("--> %s\n", __func__);
5276 if (nfs4_setup_sequence(server, &lgp->args.seq_args,
5277 &lgp->res.seq_res, 0, task))
5278 return;
5279 rpc_call_start(task);
5280}
5281
5282static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
5283{
5284 struct nfs4_layoutget *lgp = calldata;
5285 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5286
5287 dprintk("--> %s\n", __func__);
5288
5289 if (!nfs4_sequence_done(task, &lgp->res.seq_res))
5290 return;
5291
5292 switch (task->tk_status) {
5293 case 0:
5294 break;
5295 case -NFS4ERR_LAYOUTTRYLATER:
5296 case -NFS4ERR_RECALLCONFLICT:
5297 task->tk_status = -NFS4ERR_DELAY;
5298 /* Fall through */
5299 default:
5300 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5301 rpc_restart_call_prepare(task);
5302 return;
5303 }
5304 }
5305 lgp->status = task->tk_status;
5306 dprintk("<-- %s\n", __func__);
5307}
5308
5309static void nfs4_layoutget_release(void *calldata)
5310{
5311 struct nfs4_layoutget *lgp = calldata;
5312
5313 dprintk("--> %s\n", __func__);
5314 put_layout_hdr(lgp->args.inode);
5315 if (lgp->res.layout.buf != NULL)
5316 free_page((unsigned long) lgp->res.layout.buf);
5317 put_nfs_open_context(lgp->args.ctx);
5318 kfree(calldata);
5319 dprintk("<-- %s\n", __func__);
5320}
5321
5322static const struct rpc_call_ops nfs4_layoutget_call_ops = {
5323 .rpc_call_prepare = nfs4_layoutget_prepare,
5324 .rpc_call_done = nfs4_layoutget_done,
5325 .rpc_release = nfs4_layoutget_release,
5326};
5327
5328int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5329{
5330 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
5331 struct rpc_task *task;
5332 struct rpc_message msg = {
5333 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET],
5334 .rpc_argp = &lgp->args,
5335 .rpc_resp = &lgp->res,
5336 };
5337 struct rpc_task_setup task_setup_data = {
5338 .rpc_client = server->client,
5339 .rpc_message = &msg,
5340 .callback_ops = &nfs4_layoutget_call_ops,
5341 .callback_data = lgp,
5342 .flags = RPC_TASK_ASYNC,
5343 };
5344 int status = 0;
5345
5346 dprintk("--> %s\n", __func__);
5347
5348 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS);
5349 if (lgp->res.layout.buf == NULL) {
5350 nfs4_layoutget_release(lgp);
5351 return -ENOMEM;
5352 }
5353
5354 lgp->res.seq_res.sr_slot = NULL;
5355 task = rpc_run_task(&task_setup_data);
5356 if (IS_ERR(task))
5357 return PTR_ERR(task);
5358 status = nfs4_wait_for_completion_rpc_task(task);
5359 if (status != 0)
5360 goto out;
5361 status = lgp->status;
5362 if (status != 0)
5363 goto out;
5364 status = pnfs_layout_process(lgp);
5365out:
5366 rpc_put_task(task);
5367 dprintk("<-- %s status=%d\n", __func__, status);
5368 return status;
5369}
5370
5371static int
5372_nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5373{
5374 struct nfs4_getdeviceinfo_args args = {
5375 .pdev = pdev,
5376 };
5377 struct nfs4_getdeviceinfo_res res = {
5378 .pdev = pdev,
5379 };
5380 struct rpc_message msg = {
5381 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO],
5382 .rpc_argp = &args,
5383 .rpc_resp = &res,
5384 };
5385 int status;
5386
5387 dprintk("--> %s\n", __func__);
5388 status = nfs4_call_sync(server, &msg, &args, &res, 0);
5389 dprintk("<-- %s status=%d\n", __func__, status);
5390
5391 return status;
5392}
5393
5394int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5395{
5396 struct nfs4_exception exception = { };
5397 int err;
5398
5399 do {
5400 err = nfs4_handle_exception(server,
5401 _nfs4_proc_getdeviceinfo(server, pdev),
5402 &exception);
5403 } while (exception.retry);
5404 return err;
5405}
5406EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5407
5336#endif /* CONFIG_NFS_V4_1 */ 5408#endif /* CONFIG_NFS_V4_1 */
5337 5409
5338struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5410struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5443,6 +5515,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5443 .unlink_setup = nfs4_proc_unlink_setup, 5515 .unlink_setup = nfs4_proc_unlink_setup,
5444 .unlink_done = nfs4_proc_unlink_done, 5516 .unlink_done = nfs4_proc_unlink_done,
5445 .rename = nfs4_proc_rename, 5517 .rename = nfs4_proc_rename,
5518 .rename_setup = nfs4_proc_rename_setup,
5519 .rename_done = nfs4_proc_rename_done,
5446 .link = nfs4_proc_link, 5520 .link = nfs4_proc_link,
5447 .symlink = nfs4_proc_symlink, 5521 .symlink = nfs4_proc_symlink,
5448 .mkdir = nfs4_proc_mkdir, 5522 .mkdir = nfs4_proc_mkdir,
@@ -5463,6 +5537,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5463 .lock = nfs4_proc_lock, 5537 .lock = nfs4_proc_lock,
5464 .clear_acl_cache = nfs4_zap_acl_attr, 5538 .clear_acl_cache = nfs4_zap_acl_attr,
5465 .close_context = nfs4_close_context, 5539 .close_context = nfs4_close_context,
5540 .open_context = nfs4_atomic_open,
5466}; 5541};
5467 5542
5468/* 5543/*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 96524c5dca6b..f575a3126737 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -46,6 +46,7 @@
46#include <linux/kthread.h> 46#include <linux/kthread.h>
47#include <linux/module.h> 47#include <linux/module.h>
48#include <linux/random.h> 48#include <linux/random.h>
49#include <linux/ratelimit.h>
49#include <linux/workqueue.h> 50#include <linux/workqueue.h>
50#include <linux/bitops.h> 51#include <linux/bitops.h>
51 52
@@ -53,6 +54,7 @@
53#include "callback.h" 54#include "callback.h"
54#include "delegation.h" 55#include "delegation.h"
55#include "internal.h" 56#include "internal.h"
57#include "pnfs.h"
56 58
57#define OPENOWNER_POOL_SIZE 8 59#define OPENOWNER_POOL_SIZE 8
58 60
@@ -1063,6 +1065,14 @@ restart:
1063 /* Mark the file as being 'closed' */ 1065 /* Mark the file as being 'closed' */
1064 state->state = 0; 1066 state->state = 0;
1065 break; 1067 break;
1068 case -EKEYEXPIRED:
1069 /*
1070 * User RPCSEC_GSS context has expired.
1071 * We cannot recover this stateid now, so
1072 * skip it and allow recovery thread to
1073 * proceed.
1074 */
1075 break;
1066 case -NFS4ERR_ADMIN_REVOKED: 1076 case -NFS4ERR_ADMIN_REVOKED:
1067 case -NFS4ERR_STALE_STATEID: 1077 case -NFS4ERR_STALE_STATEID:
1068 case -NFS4ERR_BAD_STATEID: 1078 case -NFS4ERR_BAD_STATEID:
@@ -1138,16 +1148,14 @@ static void nfs4_reclaim_complete(struct nfs_client *clp,
1138 (void)ops->reclaim_complete(clp); 1148 (void)ops->reclaim_complete(clp);
1139} 1149}
1140 1150
1141static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) 1151static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
1142{ 1152{
1143 struct nfs4_state_owner *sp; 1153 struct nfs4_state_owner *sp;
1144 struct rb_node *pos; 1154 struct rb_node *pos;
1145 struct nfs4_state *state; 1155 struct nfs4_state *state;
1146 1156
1147 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) 1157 if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1148 return; 1158 return 0;
1149
1150 nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
1151 1159
1152 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { 1160 for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
1153 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); 1161 sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
@@ -1161,6 +1169,14 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1161 } 1169 }
1162 1170
1163 nfs_delegation_reap_unclaimed(clp); 1171 nfs_delegation_reap_unclaimed(clp);
1172 return 1;
1173}
1174
1175static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1176{
1177 if (!nfs4_state_clear_reclaim_reboot(clp))
1178 return;
1179 nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops);
1164} 1180}
1165 1181
1166static void nfs_delegation_clear_all(struct nfs_client *clp) 1182static void nfs_delegation_clear_all(struct nfs_client *clp)
@@ -1175,6 +1191,14 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
1175 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); 1191 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
1176} 1192}
1177 1193
1194static void nfs4_warn_keyexpired(const char *s)
1195{
1196 printk_ratelimited(KERN_WARNING "Error: state manager"
1197 " encountered RPCSEC_GSS session"
1198 " expired against NFSv4 server %s.\n",
1199 s);
1200}
1201
1178static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) 1202static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1179{ 1203{
1180 switch (error) { 1204 switch (error) {
@@ -1187,7 +1211,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1187 case -NFS4ERR_STALE_CLIENTID: 1211 case -NFS4ERR_STALE_CLIENTID:
1188 case -NFS4ERR_LEASE_MOVED: 1212 case -NFS4ERR_LEASE_MOVED:
1189 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1213 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1190 nfs4_state_end_reclaim_reboot(clp); 1214 nfs4_state_clear_reclaim_reboot(clp);
1191 nfs4_state_start_reclaim_reboot(clp); 1215 nfs4_state_start_reclaim_reboot(clp);
1192 break; 1216 break;
1193 case -NFS4ERR_EXPIRED: 1217 case -NFS4ERR_EXPIRED:
@@ -1204,6 +1228,10 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1204 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); 1228 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1205 /* Zero session reset errors */ 1229 /* Zero session reset errors */
1206 return 0; 1230 return 0;
1231 case -EKEYEXPIRED:
1232 /* Nothing we can do */
1233 nfs4_warn_keyexpired(clp->cl_hostname);
1234 return 0;
1207 } 1235 }
1208 return error; 1236 return error;
1209} 1237}
@@ -1414,9 +1442,10 @@ static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1414 case -NFS4ERR_DELAY: 1442 case -NFS4ERR_DELAY:
1415 case -NFS4ERR_CLID_INUSE: 1443 case -NFS4ERR_CLID_INUSE:
1416 case -EAGAIN: 1444 case -EAGAIN:
1417 case -EKEYEXPIRED:
1418 break; 1445 break;
1419 1446
1447 case -EKEYEXPIRED:
1448 nfs4_warn_keyexpired(clp->cl_hostname);
1420 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery 1449 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1421 * in nfs4_exchange_id */ 1450 * in nfs4_exchange_id */
1422 default: 1451 default:
@@ -1447,6 +1476,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1447 } 1476 }
1448 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1477 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1449 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); 1478 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1479 pnfs_destroy_all_layouts(clp);
1450 } 1480 }
1451 1481
1452 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { 1482 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 08ef91291132..f313c4cce7e4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,6 +52,7 @@
52#include <linux/nfs_idmap.h> 52#include <linux/nfs_idmap.h>
53#include "nfs4_fs.h" 53#include "nfs4_fs.h"
54#include "internal.h" 54#include "internal.h"
55#include "pnfs.h"
55 56
56#define NFSDBG_FACILITY NFSDBG_XDR 57#define NFSDBG_FACILITY NFSDBG_XDR
57 58
@@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int);
310 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) 311 XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
311#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) 312#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4)
312#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) 313#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4)
314#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
315 XDR_QUADLEN(NFS4_DEVICEID4_SIZE))
316#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
317 1 /* layout type */ + \
318 1 /* opaque devaddr4 length */ + \
319 /* devaddr4 payload is read into page */ \
320 1 /* notification bitmap length */ + \
321 1 /* notification bitmap */)
322#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
323 encode_stateid_maxsz)
324#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
325 decode_stateid_maxsz + \
326 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
313#else /* CONFIG_NFS_V4_1 */ 327#else /* CONFIG_NFS_V4_1 */
314#define encode_sequence_maxsz 0 328#define encode_sequence_maxsz 0
315#define decode_sequence_maxsz 0 329#define decode_sequence_maxsz 0
@@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int);
699#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ 713#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \
700 decode_sequence_maxsz + \ 714 decode_sequence_maxsz + \
701 decode_reclaim_complete_maxsz) 715 decode_reclaim_complete_maxsz)
716#define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \
717 encode_sequence_maxsz +\
718 encode_getdeviceinfo_maxsz)
719#define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \
720 decode_sequence_maxsz + \
721 decode_getdeviceinfo_maxsz)
722#define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \
723 encode_sequence_maxsz + \
724 encode_putfh_maxsz + \
725 encode_layoutget_maxsz)
726#define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \
727 decode_sequence_maxsz + \
728 decode_putfh_maxsz + \
729 decode_layoutget_maxsz)
702 730
703const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 731const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
704 compound_encode_hdr_maxsz + 732 compound_encode_hdr_maxsz +
@@ -816,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
816 if (iap->ia_valid & ATTR_MODE) 844 if (iap->ia_valid & ATTR_MODE)
817 len += 4; 845 len += 4;
818 if (iap->ia_valid & ATTR_UID) { 846 if (iap->ia_valid & ATTR_UID) {
819 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); 847 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ);
820 if (owner_namelen < 0) { 848 if (owner_namelen < 0) {
821 dprintk("nfs: couldn't resolve uid %d to string\n", 849 dprintk("nfs: couldn't resolve uid %d to string\n",
822 iap->ia_uid); 850 iap->ia_uid);
@@ -828,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
828 len += 4 + (XDR_QUADLEN(owner_namelen) << 2); 856 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
829 } 857 }
830 if (iap->ia_valid & ATTR_GID) { 858 if (iap->ia_valid & ATTR_GID) {
831 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); 859 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ);
832 if (owner_grouplen < 0) { 860 if (owner_grouplen < 0) {
833 dprintk("nfs: couldn't resolve gid %d to string\n", 861 dprintk("nfs: couldn't resolve gid %d to string\n",
834 iap->ia_gid); 862 iap->ia_gid);
@@ -1385,24 +1413,35 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1385 1413
1386static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) 1414static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
1387{ 1415{
1388 uint32_t attrs[2] = { 1416 uint32_t attrs[2] = {0, 0};
1389 FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, 1417 uint32_t dircount = readdir->count >> 1;
1390 FATTR4_WORD1_MOUNTED_ON_FILEID,
1391 };
1392 __be32 *p; 1418 __be32 *p;
1393 1419
1420 if (readdir->plus) {
1421 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
1422 FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE;
1423 attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
1424 FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
1425 FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
1426 FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
1427 dircount >>= 1;
1428 }
1429 attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID;
1430 attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
1431 /* Switch to mounted_on_fileid if the server supports it */
1432 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1433 attrs[0] &= ~FATTR4_WORD0_FILEID;
1434 else
1435 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1436
1394 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); 1437 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
1395 *p++ = cpu_to_be32(OP_READDIR); 1438 *p++ = cpu_to_be32(OP_READDIR);
1396 p = xdr_encode_hyper(p, readdir->cookie); 1439 p = xdr_encode_hyper(p, readdir->cookie);
1397 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); 1440 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
1398 *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */ 1441 *p++ = cpu_to_be32(dircount);
1399 *p++ = cpu_to_be32(readdir->count); 1442 *p++ = cpu_to_be32(readdir->count);
1400 *p++ = cpu_to_be32(2); 1443 *p++ = cpu_to_be32(2);
1401 /* Switch to mounted_on_fileid if the server supports it */ 1444
1402 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1403 attrs[0] &= ~FATTR4_WORD0_FILEID;
1404 else
1405 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1406 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); 1445 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
1407 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); 1446 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
1408 hdr->nops++; 1447 hdr->nops++;
@@ -1726,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr,
1726#endif /* CONFIG_NFS_V4_1 */ 1765#endif /* CONFIG_NFS_V4_1 */
1727} 1766}
1728 1767
1768#ifdef CONFIG_NFS_V4_1
1769static void
1770encode_getdeviceinfo(struct xdr_stream *xdr,
1771 const struct nfs4_getdeviceinfo_args *args,
1772 struct compound_hdr *hdr)
1773{
1774 __be32 *p;
1775
1776 p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE);
1777 *p++ = cpu_to_be32(OP_GETDEVICEINFO);
1778 p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data,
1779 NFS4_DEVICEID4_SIZE);
1780 *p++ = cpu_to_be32(args->pdev->layout_type);
1781 *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */
1782 *p++ = cpu_to_be32(0); /* bitmap length 0 */
1783 hdr->nops++;
1784 hdr->replen += decode_getdeviceinfo_maxsz;
1785}
1786
1787static void
1788encode_layoutget(struct xdr_stream *xdr,
1789 const struct nfs4_layoutget_args *args,
1790 struct compound_hdr *hdr)
1791{
1792 nfs4_stateid stateid;
1793 __be32 *p;
1794
1795 p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
1796 *p++ = cpu_to_be32(OP_LAYOUTGET);
1797 *p++ = cpu_to_be32(0); /* Signal layout available */
1798 *p++ = cpu_to_be32(args->type);
1799 *p++ = cpu_to_be32(args->range.iomode);
1800 p = xdr_encode_hyper(p, args->range.offset);
1801 p = xdr_encode_hyper(p, args->range.length);
1802 p = xdr_encode_hyper(p, args->minlength);
1803 pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
1804 args->ctx->state);
1805 p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
1806 *p = cpu_to_be32(args->maxcount);
1807
1808 dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
1809 __func__,
1810 args->type,
1811 args->range.iomode,
1812 (unsigned long)args->range.offset,
1813 (unsigned long)args->range.length,
1814 args->maxcount);
1815 hdr->nops++;
1816 hdr->replen += decode_layoutget_maxsz;
1817}
1818#endif /* CONFIG_NFS_V4_1 */
1819
1729/* 1820/*
1730 * END OF "GENERIC" ENCODE ROUTINES. 1821 * END OF "GENERIC" ENCODE ROUTINES.
1731 */ 1822 */
@@ -1823,7 +1914,7 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs
1823/* 1914/*
1824 * Encode RENAME request 1915 * Encode RENAME request
1825 */ 1916 */
1826static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs4_rename_arg *args) 1917static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args)
1827{ 1918{
1828 struct xdr_stream xdr; 1919 struct xdr_stream xdr;
1829 struct compound_hdr hdr = { 1920 struct compound_hdr hdr = {
@@ -2543,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p,
2543 return 0; 2634 return 0;
2544} 2635}
2545 2636
2637/*
2638 * Encode GETDEVICEINFO request
2639 */
2640static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p,
2641 struct nfs4_getdeviceinfo_args *args)
2642{
2643 struct xdr_stream xdr;
2644 struct compound_hdr hdr = {
2645 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2646 };
2647
2648 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2649 encode_compound_hdr(&xdr, req, &hdr);
2650 encode_sequence(&xdr, &args->seq_args, &hdr);
2651 encode_getdeviceinfo(&xdr, args, &hdr);
2652
2653 /* set up reply kvec. Subtract notification bitmap max size (2)
2654 * so that notification bitmap is put in xdr_buf tail */
2655 xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
2656 args->pdev->pages, args->pdev->pgbase,
2657 args->pdev->pglen);
2658
2659 encode_nops(&hdr);
2660 return 0;
2661}
2662
2663/*
2664 * Encode LAYOUTGET request
2665 */
2666static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
2667 struct nfs4_layoutget_args *args)
2668{
2669 struct xdr_stream xdr;
2670 struct compound_hdr hdr = {
2671 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2672 };
2673
2674 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
2675 encode_compound_hdr(&xdr, req, &hdr);
2676 encode_sequence(&xdr, &args->seq_args, &hdr);
2677 encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
2678 encode_layoutget(&xdr, args, &hdr);
2679 encode_nops(&hdr);
2680 return 0;
2681}
2546#endif /* CONFIG_NFS_V4_1 */ 2682#endif /* CONFIG_NFS_V4_1 */
2547 2683
2548static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2684static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -2676,7 +2812,10 @@ out_overflow:
2676static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) 2812static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
2677{ 2813{
2678 if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) { 2814 if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) {
2679 decode_attr_bitmap(xdr, bitmask); 2815 int ret;
2816 ret = decode_attr_bitmap(xdr, bitmask);
2817 if (unlikely(ret < 0))
2818 return ret;
2680 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; 2819 bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
2681 } else 2820 } else
2682 bitmask[0] = bitmask[1] = 0; 2821 bitmask[0] = bitmask[1] = 0;
@@ -2848,6 +2987,56 @@ out_overflow:
2848 return -EIO; 2987 return -EIO;
2849} 2988}
2850 2989
2990static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
2991{
2992 __be32 *p;
2993
2994 if (unlikely(bitmap[0] & (FATTR4_WORD0_RDATTR_ERROR - 1U)))
2995 return -EIO;
2996 if (likely(bitmap[0] & FATTR4_WORD0_RDATTR_ERROR)) {
2997 p = xdr_inline_decode(xdr, 4);
2998 if (unlikely(!p))
2999 goto out_overflow;
3000 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
3001 }
3002 return 0;
3003out_overflow:
3004 print_overflow_msg(__func__, xdr);
3005 return -EIO;
3006}
3007
3008static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fh *fh)
3009{
3010 __be32 *p;
3011 int len;
3012
3013 if (fh != NULL)
3014 memset(fh, 0, sizeof(*fh));
3015
3016 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEHANDLE - 1U)))
3017 return -EIO;
3018 if (likely(bitmap[0] & FATTR4_WORD0_FILEHANDLE)) {
3019 p = xdr_inline_decode(xdr, 4);
3020 if (unlikely(!p))
3021 goto out_overflow;
3022 len = be32_to_cpup(p);
3023 if (len > NFS4_FHSIZE)
3024 return -EIO;
3025 p = xdr_inline_decode(xdr, len);
3026 if (unlikely(!p))
3027 goto out_overflow;
3028 if (fh != NULL) {
3029 memcpy(fh->data, p, len);
3030 fh->size = len;
3031 }
3032 bitmap[0] &= ~FATTR4_WORD0_FILEHANDLE;
3033 }
3034 return 0;
3035out_overflow:
3036 print_overflow_msg(__func__, xdr);
3037 return -EIO;
3038}
3039
2851static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3040static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
2852{ 3041{
2853 __be32 *p; 3042 __be32 *p;
@@ -3521,6 +3710,24 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s
3521 return status; 3710 return status;
3522} 3711}
3523 3712
3713static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap,
3714 struct timespec *time)
3715{
3716 int status = 0;
3717
3718 time->tv_sec = 0;
3719 time->tv_nsec = 0;
3720 if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_DELTA - 1U)))
3721 return -EIO;
3722 if (likely(bitmap[1] & FATTR4_WORD1_TIME_DELTA)) {
3723 status = decode_attr_time(xdr, time);
3724 bitmap[1] &= ~FATTR4_WORD1_TIME_DELTA;
3725 }
3726 dprintk("%s: time_delta=%ld %ld\n", __func__, (long)time->tv_sec,
3727 (long)time->tv_nsec);
3728 return status;
3729}
3730
3524static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) 3731static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
3525{ 3732{
3526 int status = 0; 3733 int status = 0;
@@ -3744,29 +3951,14 @@ xdr_error:
3744 return status; 3951 return status;
3745} 3952}
3746 3953
3747static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, 3954static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3955 struct nfs_fattr *fattr, struct nfs_fh *fh,
3748 const struct nfs_server *server, int may_sleep) 3956 const struct nfs_server *server, int may_sleep)
3749{ 3957{
3750 __be32 *savep;
3751 uint32_t attrlen,
3752 bitmap[2] = {0},
3753 type;
3754 int status; 3958 int status;
3755 umode_t fmode = 0; 3959 umode_t fmode = 0;
3756 uint64_t fileid; 3960 uint64_t fileid;
3757 3961 uint32_t type;
3758 status = decode_op_hdr(xdr, OP_GETATTR);
3759 if (status < 0)
3760 goto xdr_error;
3761
3762 status = decode_attr_bitmap(xdr, bitmap);
3763 if (status < 0)
3764 goto xdr_error;
3765
3766 status = decode_attr_length(xdr, &attrlen, &savep);
3767 if (status < 0)
3768 goto xdr_error;
3769
3770 3962
3771 status = decode_attr_type(xdr, bitmap, &type); 3963 status = decode_attr_type(xdr, bitmap, &type);
3772 if (status < 0) 3964 if (status < 0)
@@ -3792,6 +3984,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3792 goto xdr_error; 3984 goto xdr_error;
3793 fattr->valid |= status; 3985 fattr->valid |= status;
3794 3986
3987 status = decode_attr_error(xdr, bitmap);
3988 if (status < 0)
3989 goto xdr_error;
3990
3991 status = decode_attr_filehandle(xdr, bitmap, fh);
3992 if (status < 0)
3993 goto xdr_error;
3994
3795 status = decode_attr_fileid(xdr, bitmap, &fattr->fileid); 3995 status = decode_attr_fileid(xdr, bitmap, &fattr->fileid);
3796 if (status < 0) 3996 if (status < 0)
3797 goto xdr_error; 3997 goto xdr_error;
@@ -3862,12 +4062,101 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
3862 fattr->valid |= status; 4062 fattr->valid |= status;
3863 } 4063 }
3864 4064
4065xdr_error:
4066 dprintk("%s: xdr returned %d\n", __func__, -status);
4067 return status;
4068}
4069
4070static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4071 struct nfs_fh *fh, const struct nfs_server *server, int may_sleep)
4072{
4073 __be32 *savep;
4074 uint32_t attrlen,
4075 bitmap[2] = {0};
4076 int status;
4077
4078 status = decode_op_hdr(xdr, OP_GETATTR);
4079 if (status < 0)
4080 goto xdr_error;
4081
4082 status = decode_attr_bitmap(xdr, bitmap);
4083 if (status < 0)
4084 goto xdr_error;
4085
4086 status = decode_attr_length(xdr, &attrlen, &savep);
4087 if (status < 0)
4088 goto xdr_error;
4089
4090 status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep);
4091 if (status < 0)
4092 goto xdr_error;
4093
3865 status = verify_attr_len(xdr, savep, attrlen); 4094 status = verify_attr_len(xdr, savep, attrlen);
3866xdr_error: 4095xdr_error:
3867 dprintk("%s: xdr returned %d\n", __func__, -status); 4096 dprintk("%s: xdr returned %d\n", __func__, -status);
3868 return status; 4097 return status;
3869} 4098}
3870 4099
4100static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr,
4101 const struct nfs_server *server, int may_sleep)
4102{
4103 return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep);
4104}
4105
4106/*
4107 * Decode potentially multiple layout types. Currently we only support
4108 * one layout driver per file system.
4109 */
4110static int decode_first_pnfs_layout_type(struct xdr_stream *xdr,
4111 uint32_t *layouttype)
4112{
4113 uint32_t *p;
4114 int num;
4115
4116 p = xdr_inline_decode(xdr, 4);
4117 if (unlikely(!p))
4118 goto out_overflow;
4119 num = be32_to_cpup(p);
4120
4121 /* pNFS is not supported by the underlying file system */
4122 if (num == 0) {
4123 *layouttype = 0;
4124 return 0;
4125 }
4126 if (num > 1)
4127 printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers "
4128 "per filesystem not supported\n", __func__);
4129
4130 /* Decode and set first layout type, move xdr->p past unused types */
4131 p = xdr_inline_decode(xdr, num * 4);
4132 if (unlikely(!p))
4133 goto out_overflow;
4134 *layouttype = be32_to_cpup(p);
4135 return 0;
4136out_overflow:
4137 print_overflow_msg(__func__, xdr);
4138 return -EIO;
4139}
4140
4141/*
4142 * The type of file system exported.
4143 * Note we must ensure that layouttype is set in any non-error case.
4144 */
4145static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap,
4146 uint32_t *layouttype)
4147{
4148 int status = 0;
4149
4150 dprintk("%s: bitmap is %x\n", __func__, bitmap[1]);
4151 if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U)))
4152 return -EIO;
4153 if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) {
4154 status = decode_first_pnfs_layout_type(xdr, layouttype);
4155 bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES;
4156 } else
4157 *layouttype = 0;
4158 return status;
4159}
3871 4160
3872static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) 4161static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
3873{ 4162{
@@ -3894,6 +4183,12 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
3894 if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) 4183 if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
3895 goto xdr_error; 4184 goto xdr_error;
3896 fsinfo->wtpref = fsinfo->wtmax; 4185 fsinfo->wtpref = fsinfo->wtmax;
4186 status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
4187 if (status != 0)
4188 goto xdr_error;
4189 status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
4190 if (status != 0)
4191 goto xdr_error;
3897 4192
3898 status = verify_attr_len(xdr, savep, attrlen); 4193 status = verify_attr_len(xdr, savep, attrlen);
3899xdr_error: 4194xdr_error:
@@ -3950,13 +4245,13 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3950 __be32 *p; 4245 __be32 *p;
3951 uint32_t namelen, type; 4246 uint32_t namelen, type;
3952 4247
3953 p = xdr_inline_decode(xdr, 32); 4248 p = xdr_inline_decode(xdr, 32); /* read 32 bytes */
3954 if (unlikely(!p)) 4249 if (unlikely(!p))
3955 goto out_overflow; 4250 goto out_overflow;
3956 p = xdr_decode_hyper(p, &offset); 4251 p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */
3957 p = xdr_decode_hyper(p, &length); 4252 p = xdr_decode_hyper(p, &length);
3958 type = be32_to_cpup(p++); 4253 type = be32_to_cpup(p++); /* 4 byte read */
3959 if (fl != NULL) { 4254 if (fl != NULL) { /* manipulate file lock */
3960 fl->fl_start = (loff_t)offset; 4255 fl->fl_start = (loff_t)offset;
3961 fl->fl_end = fl->fl_start + (loff_t)length - 1; 4256 fl->fl_end = fl->fl_start + (loff_t)length - 1;
3962 if (length == ~(uint64_t)0) 4257 if (length == ~(uint64_t)0)
@@ -3966,9 +4261,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3966 fl->fl_type = F_RDLCK; 4261 fl->fl_type = F_RDLCK;
3967 fl->fl_pid = 0; 4262 fl->fl_pid = 0;
3968 } 4263 }
3969 p = xdr_decode_hyper(p, &clientid); 4264 p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */
3970 namelen = be32_to_cpup(p); 4265 namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */
3971 p = xdr_inline_decode(xdr, namelen); 4266 p = xdr_inline_decode(xdr, namelen); /* variable size field */
3972 if (likely(p)) 4267 if (likely(p))
3973 return -NFS4ERR_DENIED; 4268 return -NFS4ERR_DENIED;
3974out_overflow: 4269out_overflow:
@@ -4200,12 +4495,9 @@ out_overflow:
4200static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) 4495static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
4201{ 4496{
4202 struct xdr_buf *rcvbuf = &req->rq_rcv_buf; 4497 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
4203 struct page *page = *rcvbuf->pages;
4204 struct kvec *iov = rcvbuf->head; 4498 struct kvec *iov = rcvbuf->head;
4205 size_t hdrlen; 4499 size_t hdrlen;
4206 u32 recvd, pglen = rcvbuf->page_len; 4500 u32 recvd, pglen = rcvbuf->page_len;
4207 __be32 *end, *entry, *p, *kaddr;
4208 unsigned int nr = 0;
4209 int status; 4501 int status;
4210 4502
4211 status = decode_op_hdr(xdr, OP_READDIR); 4503 status = decode_op_hdr(xdr, OP_READDIR);
@@ -4225,71 +4517,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
4225 pglen = recvd; 4517 pglen = recvd;
4226 xdr_read_pages(xdr, pglen); 4518 xdr_read_pages(xdr, pglen);
4227 4519
4228 BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); 4520
4229 kaddr = p = kmap_atomic(page, KM_USER0);
4230 end = p + ((pglen + readdir->pgbase) >> 2);
4231 entry = p;
4232
4233 /* Make sure the packet actually has a value_follows and EOF entry */
4234 if ((entry + 1) > end)
4235 goto short_pkt;
4236
4237 for (; *p++; nr++) {
4238 u32 len, attrlen, xlen;
4239 if (end - p < 3)
4240 goto short_pkt;
4241 dprintk("cookie = %Lu, ", *((unsigned long long *)p));
4242 p += 2; /* cookie */
4243 len = ntohl(*p++); /* filename length */
4244 if (len > NFS4_MAXNAMLEN) {
4245 dprintk("NFS: giant filename in readdir (len 0x%x)\n",
4246 len);
4247 goto err_unmap;
4248 }
4249 xlen = XDR_QUADLEN(len);
4250 if (end - p < xlen + 1)
4251 goto short_pkt;
4252 dprintk("filename = %*s\n", len, (char *)p);
4253 p += xlen;
4254 len = ntohl(*p++); /* bitmap length */
4255 if (end - p < len + 1)
4256 goto short_pkt;
4257 p += len;
4258 attrlen = XDR_QUADLEN(ntohl(*p++));
4259 if (end - p < attrlen + 2)
4260 goto short_pkt;
4261 p += attrlen; /* attributes */
4262 entry = p;
4263 }
4264 /*
4265 * Apparently some server sends responses that are a valid size, but
4266 * contain no entries, and have value_follows==0 and EOF==0. For
4267 * those, just set the EOF marker.
4268 */
4269 if (!nr && entry[1] == 0) {
4270 dprintk("NFS: readdir reply truncated!\n");
4271 entry[1] = 1;
4272 }
4273out:
4274 kunmap_atomic(kaddr, KM_USER0);
4275 return 0; 4521 return 0;
4276short_pkt:
4277 /*
4278 * When we get a short packet there are 2 possibilities. We can
4279 * return an error, or fix up the response to look like a valid
4280 * response and return what we have so far. If there are no
4281 * entries and the packet was short, then return -EIO. If there
4282 * are valid entries in the response, return them and pretend that
4283 * the call was successful, but incomplete. The caller can retry the
4284 * readdir starting at the last cookie.
4285 */
4286 dprintk("%s: short packet at entry %d\n", __func__, nr);
4287 entry[0] = entry[1] = 0;
4288 if (nr)
4289 goto out;
4290err_unmap:
4291 kunmap_atomic(kaddr, KM_USER0);
4292 return -errno_NFSERR_IO;
4293} 4522}
4294 4523
4295static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) 4524static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
@@ -4299,7 +4528,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4299 size_t hdrlen; 4528 size_t hdrlen;
4300 u32 len, recvd; 4529 u32 len, recvd;
4301 __be32 *p; 4530 __be32 *p;
4302 char *kaddr;
4303 int status; 4531 int status;
4304 4532
4305 status = decode_op_hdr(xdr, OP_READLINK); 4533 status = decode_op_hdr(xdr, OP_READLINK);
@@ -4330,9 +4558,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4330 * and and null-terminate the text (the VFS expects 4558 * and and null-terminate the text (the VFS expects
4331 * null-termination). 4559 * null-termination).
4332 */ 4560 */
4333 kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); 4561 xdr_terminate_string(rcvbuf, len);
4334 kaddr[len+rcvbuf->page_base] = '\0';
4335 kunmap_atomic(kaddr, KM_USER0);
4336 return 0; 4562 return 0;
4337out_overflow: 4563out_overflow:
4338 print_overflow_msg(__func__, xdr); 4564 print_overflow_msg(__func__, xdr);
@@ -4668,7 +4894,6 @@ static int decode_sequence(struct xdr_stream *xdr,
4668 struct rpc_rqst *rqstp) 4894 struct rpc_rqst *rqstp)
4669{ 4895{
4670#if defined(CONFIG_NFS_V4_1) 4896#if defined(CONFIG_NFS_V4_1)
4671 struct nfs4_slot *slot;
4672 struct nfs4_sessionid id; 4897 struct nfs4_sessionid id;
4673 u32 dummy; 4898 u32 dummy;
4674 int status; 4899 int status;
@@ -4700,15 +4925,14 @@ static int decode_sequence(struct xdr_stream *xdr,
4700 goto out_overflow; 4925 goto out_overflow;
4701 4926
4702 /* seqid */ 4927 /* seqid */
4703 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4704 dummy = be32_to_cpup(p++); 4928 dummy = be32_to_cpup(p++);
4705 if (dummy != slot->seq_nr) { 4929 if (dummy != res->sr_slot->seq_nr) {
4706 dprintk("%s Invalid sequence number\n", __func__); 4930 dprintk("%s Invalid sequence number\n", __func__);
4707 goto out_err; 4931 goto out_err;
4708 } 4932 }
4709 /* slot id */ 4933 /* slot id */
4710 dummy = be32_to_cpup(p++); 4934 dummy = be32_to_cpup(p++);
4711 if (dummy != res->sr_slotid) { 4935 if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) {
4712 dprintk("%s Invalid slot id\n", __func__); 4936 dprintk("%s Invalid slot id\n", __func__);
4713 goto out_err; 4937 goto out_err;
4714 } 4938 }
@@ -4731,6 +4955,134 @@ out_overflow:
4731#endif /* CONFIG_NFS_V4_1 */ 4955#endif /* CONFIG_NFS_V4_1 */
4732} 4956}
4733 4957
4958#if defined(CONFIG_NFS_V4_1)
4959
4960static int decode_getdeviceinfo(struct xdr_stream *xdr,
4961 struct pnfs_device *pdev)
4962{
4963 __be32 *p;
4964 uint32_t len, type;
4965 int status;
4966
4967 status = decode_op_hdr(xdr, OP_GETDEVICEINFO);
4968 if (status) {
4969 if (status == -ETOOSMALL) {
4970 p = xdr_inline_decode(xdr, 4);
4971 if (unlikely(!p))
4972 goto out_overflow;
4973 pdev->mincount = be32_to_cpup(p);
4974 dprintk("%s: Min count too small. mincnt = %u\n",
4975 __func__, pdev->mincount);
4976 }
4977 return status;
4978 }
4979
4980 p = xdr_inline_decode(xdr, 8);
4981 if (unlikely(!p))
4982 goto out_overflow;
4983 type = be32_to_cpup(p++);
4984 if (type != pdev->layout_type) {
4985 dprintk("%s: layout mismatch req: %u pdev: %u\n",
4986 __func__, pdev->layout_type, type);
4987 return -EINVAL;
4988 }
4989 /*
4990 * Get the length of the opaque device_addr4. xdr_read_pages places
4991 * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages)
4992 * and places the remaining xdr data in xdr_buf->tail
4993 */
4994 pdev->mincount = be32_to_cpup(p);
4995 xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
4996
4997 /* Parse notification bitmap, verifying that it is zero. */
4998 p = xdr_inline_decode(xdr, 4);
4999 if (unlikely(!p))
5000 goto out_overflow;
5001 len = be32_to_cpup(p);
5002 if (len) {
5003 int i;
5004
5005 p = xdr_inline_decode(xdr, 4 * len);
5006 if (unlikely(!p))
5007 goto out_overflow;
5008 for (i = 0; i < len; i++, p++) {
5009 if (be32_to_cpup(p)) {
5010 dprintk("%s: notifications not supported\n",
5011 __func__);
5012 return -EIO;
5013 }
5014 }
5015 }
5016 return 0;
5017out_overflow:
5018 print_overflow_msg(__func__, xdr);
5019 return -EIO;
5020}
5021
5022static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
5023 struct nfs4_layoutget_res *res)
5024{
5025 __be32 *p;
5026 int status;
5027 u32 layout_count;
5028
5029 status = decode_op_hdr(xdr, OP_LAYOUTGET);
5030 if (status)
5031 return status;
5032 p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE);
5033 if (unlikely(!p))
5034 goto out_overflow;
5035 res->return_on_close = be32_to_cpup(p++);
5036 p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
5037 layout_count = be32_to_cpup(p);
5038 if (!layout_count) {
5039 dprintk("%s: server responded with empty layout array\n",
5040 __func__);
5041 return -EINVAL;
5042 }
5043
5044 p = xdr_inline_decode(xdr, 24);
5045 if (unlikely(!p))
5046 goto out_overflow;
5047 p = xdr_decode_hyper(p, &res->range.offset);
5048 p = xdr_decode_hyper(p, &res->range.length);
5049 res->range.iomode = be32_to_cpup(p++);
5050 res->type = be32_to_cpup(p++);
5051
5052 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
5053 if (unlikely(status))
5054 return status;
5055
5056 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
5057 __func__,
5058 (unsigned long)res->range.offset,
5059 (unsigned long)res->range.length,
5060 res->range.iomode,
5061 res->type,
5062 res->layout.len);
5063
5064 /* nfs4_proc_layoutget allocated a single page */
5065 if (res->layout.len > PAGE_SIZE)
5066 return -ENOMEM;
5067 memcpy(res->layout.buf, p, res->layout.len);
5068
5069 if (layout_count > 1) {
5070 /* We only handle a length one array at the moment. Any
5071 * further entries are just ignored. Note that this means
5072 * the client may see a response that is less than the
5073 * minimum it requested.
5074 */
5075 dprintk("%s: server responded with %d layouts, dropping tail\n",
5076 __func__, layout_count);
5077 }
5078
5079 return 0;
5080out_overflow:
5081 print_overflow_msg(__func__, xdr);
5082 return -EIO;
5083}
5084#endif /* CONFIG_NFS_V4_1 */
5085
4734/* 5086/*
4735 * END OF "GENERIC" DECODE ROUTINES. 5087 * END OF "GENERIC" DECODE ROUTINES.
4736 */ 5088 */
@@ -4873,7 +5225,7 @@ out:
4873/* 5225/*
4874 * Decode RENAME response 5226 * Decode RENAME response
4875 */ 5227 */
4876static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_rename_res *res) 5228static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res)
4877{ 5229{
4878 struct xdr_stream xdr; 5230 struct xdr_stream xdr;
4879 struct compound_hdr hdr; 5231 struct compound_hdr hdr;
@@ -5758,25 +6110,84 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p,
5758 status = decode_reclaim_complete(&xdr, (void *)NULL); 6110 status = decode_reclaim_complete(&xdr, (void *)NULL);
5759 return status; 6111 return status;
5760} 6112}
6113
6114/*
6115 * Decode GETDEVINFO response
6116 */
6117static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p,
6118 struct nfs4_getdeviceinfo_res *res)
6119{
6120 struct xdr_stream xdr;
6121 struct compound_hdr hdr;
6122 int status;
6123
6124 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6125 status = decode_compound_hdr(&xdr, &hdr);
6126 if (status != 0)
6127 goto out;
6128 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6129 if (status != 0)
6130 goto out;
6131 status = decode_getdeviceinfo(&xdr, res->pdev);
6132out:
6133 return status;
6134}
6135
6136/*
6137 * Decode LAYOUTGET response
6138 */
6139static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p,
6140 struct nfs4_layoutget_res *res)
6141{
6142 struct xdr_stream xdr;
6143 struct compound_hdr hdr;
6144 int status;
6145
6146 xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
6147 status = decode_compound_hdr(&xdr, &hdr);
6148 if (status)
6149 goto out;
6150 status = decode_sequence(&xdr, &res->seq_res, rqstp);
6151 if (status)
6152 goto out;
6153 status = decode_putfh(&xdr);
6154 if (status)
6155 goto out;
6156 status = decode_layoutget(&xdr, rqstp, res);
6157out:
6158 return status;
6159}
5761#endif /* CONFIG_NFS_V4_1 */ 6160#endif /* CONFIG_NFS_V4_1 */
5762 6161
5763__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) 6162__be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6163 struct nfs_server *server, int plus)
5764{ 6164{
5765 uint32_t bitmap[2] = {0}; 6165 uint32_t bitmap[2] = {0};
5766 uint32_t len; 6166 uint32_t len;
5767 6167 __be32 *p = xdr_inline_decode(xdr, 4);
5768 if (!*p++) { 6168 if (unlikely(!p))
5769 if (!*p) 6169 goto out_overflow;
6170 if (!ntohl(*p++)) {
6171 p = xdr_inline_decode(xdr, 4);
6172 if (unlikely(!p))
6173 goto out_overflow;
6174 if (!ntohl(*p++))
5770 return ERR_PTR(-EAGAIN); 6175 return ERR_PTR(-EAGAIN);
5771 entry->eof = 1; 6176 entry->eof = 1;
5772 return ERR_PTR(-EBADCOOKIE); 6177 return ERR_PTR(-EBADCOOKIE);
5773 } 6178 }
5774 6179
6180 p = xdr_inline_decode(xdr, 12);
6181 if (unlikely(!p))
6182 goto out_overflow;
5775 entry->prev_cookie = entry->cookie; 6183 entry->prev_cookie = entry->cookie;
5776 p = xdr_decode_hyper(p, &entry->cookie); 6184 p = xdr_decode_hyper(p, &entry->cookie);
5777 entry->len = ntohl(*p++); 6185 entry->len = ntohl(*p++);
6186
6187 p = xdr_inline_decode(xdr, entry->len);
6188 if (unlikely(!p))
6189 goto out_overflow;
5778 entry->name = (const char *) p; 6190 entry->name = (const char *) p;
5779 p += XDR_QUADLEN(entry->len);
5780 6191
5781 /* 6192 /*
5782 * In case the server doesn't return an inode number, 6193 * In case the server doesn't return an inode number,
@@ -5784,32 +6195,33 @@ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
5784 * since glibc seems to choke on it...) 6195 * since glibc seems to choke on it...)
5785 */ 6196 */
5786 entry->ino = 1; 6197 entry->ino = 1;
6198 entry->fattr->valid = 0;
5787 6199
5788 len = ntohl(*p++); /* bitmap length */ 6200 if (decode_attr_bitmap(xdr, bitmap) < 0)
5789 if (len-- > 0) { 6201 goto out_overflow;
5790 bitmap[0] = ntohl(*p++); 6202
5791 if (len-- > 0) { 6203 if (decode_attr_length(xdr, &len, &p) < 0)
5792 bitmap[1] = ntohl(*p++); 6204 goto out_overflow;
5793 p += len; 6205
5794 } 6206 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0)
5795 } 6207 goto out_overflow;
5796 len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ 6208 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
5797 if (len > 0) { 6209 entry->ino = entry->fattr->fileid;
5798 if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { 6210
5799 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; 6211 if (verify_attr_len(xdr, p, len) < 0)
5800 /* Ignore the return value of rdattr_error for now */ 6212 goto out_overflow;
5801 p++; 6213
5802 len--; 6214 p = xdr_inline_peek(xdr, 8);
5803 } 6215 if (p != NULL)
5804 if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) 6216 entry->eof = !p[0] && p[1];
5805 xdr_decode_hyper(p, &entry->ino); 6217 else
5806 else if (bitmap[0] == FATTR4_WORD0_FILEID) 6218 entry->eof = 0;
5807 xdr_decode_hyper(p, &entry->ino);
5808 p += len;
5809 }
5810 6219
5811 entry->eof = !p[0] && p[1];
5812 return p; 6220 return p;
6221
6222out_overflow:
6223 print_overflow_msg(__func__, xdr);
6224 return ERR_PTR(-EIO);
5813} 6225}
5814 6226
5815/* 6227/*
@@ -5936,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = {
5936 PROC(SEQUENCE, enc_sequence, dec_sequence), 6348 PROC(SEQUENCE, enc_sequence, dec_sequence),
5937 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), 6349 PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time),
5938 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6350 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6351 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6352 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
5939#endif /* CONFIG_NFS_V4_1 */ 6353#endif /* CONFIG_NFS_V4_1 */
5940}; 6354};
5941 6355
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index df101d9f546a..903908a20023 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -3,9 +3,10 @@
3 * 3 *
4 * Allow an NFS filesystem to be mounted as root. The way this works is: 4 * Allow an NFS filesystem to be mounted as root. The way this works is:
5 * (1) Use the IP autoconfig mechanism to set local IP addresses and routes. 5 * (1) Use the IP autoconfig mechanism to set local IP addresses and routes.
6 * (2) Handle RPC negotiation with the system which replied to RARP or 6 * (2) Construct the device string and the options string using DHCP
7 * was reported as a boot server by BOOTP or manually. 7 * option 17 and/or kernel command line options.
8 * (3) The actual mounting is done later, when init() is running. 8 * (3) When mount_root() sets up the root file system, pass these strings
9 * to the NFS client's regular mount interface via sys_mount().
9 * 10 *
10 * 11 *
11 * Changes: 12 * Changes:
@@ -65,470 +66,245 @@
65 * Hua Qin : Support for mounting root file system via 66 * Hua Qin : Support for mounting root file system via
66 * NFS over TCP. 67 * NFS over TCP.
67 * Fabian Frederick: Option parser rebuilt (using parser lib) 68 * Fabian Frederick: Option parser rebuilt (using parser lib)
68*/ 69 * Chuck Lever : Use super.c's text-based mount option parsing
70 * Chuck Lever : Add "nfsrootdebug".
71 */
69 72
70#include <linux/types.h> 73#include <linux/types.h>
71#include <linux/string.h> 74#include <linux/string.h>
72#include <linux/kernel.h>
73#include <linux/time.h>
74#include <linux/fs.h>
75#include <linux/init.h> 75#include <linux/init.h>
76#include <linux/sunrpc/clnt.h>
77#include <linux/sunrpc/xprtsock.h>
78#include <linux/nfs.h> 76#include <linux/nfs.h>
79#include <linux/nfs_fs.h> 77#include <linux/nfs_fs.h>
80#include <linux/nfs_mount.h>
81#include <linux/in.h>
82#include <linux/major.h>
83#include <linux/utsname.h> 78#include <linux/utsname.h>
84#include <linux/inet.h>
85#include <linux/root_dev.h> 79#include <linux/root_dev.h>
86#include <net/ipconfig.h> 80#include <net/ipconfig.h>
87#include <linux/parser.h>
88 81
89#include "internal.h" 82#include "internal.h"
90 83
91/* Define this to allow debugging output */
92#undef NFSROOT_DEBUG
93#define NFSDBG_FACILITY NFSDBG_ROOT 84#define NFSDBG_FACILITY NFSDBG_ROOT
94 85
95/* Default port to use if server is not running a portmapper */
96#define NFS_MNT_PORT 627
97
98/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
99#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
100 88
101/* Parameters passed from the kernel command line */ 89/* Parameters passed from the kernel command line */
102static char nfs_root_name[256] __initdata = ""; 90static char nfs_root_parms[256] __initdata = "";
91
92/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = "";
103 94
104/* Address of NFS server */ 95/* Address of NFS server */
105static __be32 servaddr __initdata = 0; 96static __be32 servaddr __initdata = htonl(INADDR_NONE);
106 97
107/* Name of directory to mount */ 98/* Name of directory to mount */
108static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = { 0, }; 99static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
109
110/* NFS-related data */
111static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */
112static int nfs_port __initdata = 0; /* Port to connect to for NFS */
113static int mount_port __initdata = 0; /* Mount daemon port number */
114
115
116/***************************************************************************
117
118 Parsing of options
119
120 ***************************************************************************/
121
122enum {
123 /* Options that take integer arguments */
124 Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin,
125 Opt_acregmax, Opt_acdirmin, Opt_acdirmax,
126 /* Options that take no arguments */
127 Opt_soft, Opt_hard, Opt_intr,
128 Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac,
129 Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp,
130 Opt_acl, Opt_noacl,
131 /* Error token */
132 Opt_err
133};
134
135static const match_table_t tokens __initconst = {
136 {Opt_port, "port=%u"},
137 {Opt_rsize, "rsize=%u"},
138 {Opt_wsize, "wsize=%u"},
139 {Opt_timeo, "timeo=%u"},
140 {Opt_retrans, "retrans=%u"},
141 {Opt_acregmin, "acregmin=%u"},
142 {Opt_acregmax, "acregmax=%u"},
143 {Opt_acdirmin, "acdirmin=%u"},
144 {Opt_acdirmax, "acdirmax=%u"},
145 {Opt_soft, "soft"},
146 {Opt_hard, "hard"},
147 {Opt_intr, "intr"},
148 {Opt_nointr, "nointr"},
149 {Opt_posix, "posix"},
150 {Opt_noposix, "noposix"},
151 {Opt_cto, "cto"},
152 {Opt_nocto, "nocto"},
153 {Opt_ac, "ac"},
154 {Opt_noac, "noac"},
155 {Opt_lock, "lock"},
156 {Opt_nolock, "nolock"},
157 {Opt_v2, "nfsvers=2"},
158 {Opt_v2, "v2"},
159 {Opt_v3, "nfsvers=3"},
160 {Opt_v3, "v3"},
161 {Opt_udp, "proto=udp"},
162 {Opt_udp, "udp"},
163 {Opt_tcp, "proto=tcp"},
164 {Opt_tcp, "tcp"},
165 {Opt_acl, "acl"},
166 {Opt_noacl, "noacl"},
167 {Opt_err, NULL}
168
169};
170 100
101/* server:export path string passed to super.c */
102static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
103
104#ifdef RPC_DEBUG
171/* 105/*
172 * Parse option string. 106 * When the "nfsrootdebug" kernel command line option is specified,
107 * enable debugging messages for NFSROOT.
173 */ 108 */
174 109static int __init nfs_root_debug(char *__unused)
175static int __init root_nfs_parse(char *name, char *buf)
176{ 110{
177 111 nfs_debug |= NFSDBG_ROOT | NFSDBG_MOUNT;
178 char *p;
179 substring_t args[MAX_OPT_ARGS];
180 int option;
181
182 if (!name)
183 return 1;
184
185 /* Set the NFS remote path */
186 p = strsep(&name, ",");
187 if (p[0] != '\0' && strcmp(p, "default") != 0)
188 strlcpy(buf, p, NFS_MAXPATHLEN);
189
190 while ((p = strsep (&name, ",")) != NULL) {
191 int token;
192 if (!*p)
193 continue;
194 token = match_token(p, tokens, args);
195
196 /* %u tokens only. Beware if you add new tokens! */
197 if (token < Opt_soft && match_int(&args[0], &option))
198 return 0;
199 switch (token) {
200 case Opt_port:
201 nfs_port = option;
202 break;
203 case Opt_rsize:
204 nfs_data.rsize = option;
205 break;
206 case Opt_wsize:
207 nfs_data.wsize = option;
208 break;
209 case Opt_timeo:
210 nfs_data.timeo = option;
211 break;
212 case Opt_retrans:
213 nfs_data.retrans = option;
214 break;
215 case Opt_acregmin:
216 nfs_data.acregmin = option;
217 break;
218 case Opt_acregmax:
219 nfs_data.acregmax = option;
220 break;
221 case Opt_acdirmin:
222 nfs_data.acdirmin = option;
223 break;
224 case Opt_acdirmax:
225 nfs_data.acdirmax = option;
226 break;
227 case Opt_soft:
228 nfs_data.flags |= NFS_MOUNT_SOFT;
229 break;
230 case Opt_hard:
231 nfs_data.flags &= ~NFS_MOUNT_SOFT;
232 break;
233 case Opt_intr:
234 case Opt_nointr:
235 break;
236 case Opt_posix:
237 nfs_data.flags |= NFS_MOUNT_POSIX;
238 break;
239 case Opt_noposix:
240 nfs_data.flags &= ~NFS_MOUNT_POSIX;
241 break;
242 case Opt_cto:
243 nfs_data.flags &= ~NFS_MOUNT_NOCTO;
244 break;
245 case Opt_nocto:
246 nfs_data.flags |= NFS_MOUNT_NOCTO;
247 break;
248 case Opt_ac:
249 nfs_data.flags &= ~NFS_MOUNT_NOAC;
250 break;
251 case Opt_noac:
252 nfs_data.flags |= NFS_MOUNT_NOAC;
253 break;
254 case Opt_lock:
255 nfs_data.flags &= ~NFS_MOUNT_NONLM;
256 break;
257 case Opt_nolock:
258 nfs_data.flags |= NFS_MOUNT_NONLM;
259 break;
260 case Opt_v2:
261 nfs_data.flags &= ~NFS_MOUNT_VER3;
262 break;
263 case Opt_v3:
264 nfs_data.flags |= NFS_MOUNT_VER3;
265 break;
266 case Opt_udp:
267 nfs_data.flags &= ~NFS_MOUNT_TCP;
268 break;
269 case Opt_tcp:
270 nfs_data.flags |= NFS_MOUNT_TCP;
271 break;
272 case Opt_acl:
273 nfs_data.flags &= ~NFS_MOUNT_NOACL;
274 break;
275 case Opt_noacl:
276 nfs_data.flags |= NFS_MOUNT_NOACL;
277 break;
278 default:
279 printk(KERN_WARNING "Root-NFS: unknown "
280 "option: %s\n", p);
281 return 0;
282 }
283 }
284
285 return 1; 112 return 1;
286} 113}
287 114
115__setup("nfsrootdebug", nfs_root_debug);
116#endif
117
288/* 118/*
289 * Prepare the NFS data structure and parse all options. 119 * Parse NFS server and directory information passed on the kernel
120 * command line.
121 *
122 * nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
123 *
124 * If there is a "%s" token in the <root-dir> string, it is replaced
125 * by the ASCII-representation of the client's IP address.
290 */ 126 */
291static int __init root_nfs_name(char *name) 127static int __init nfs_root_setup(char *line)
292{ 128{
293 static char buf[NFS_MAXPATHLEN] __initdata; 129 ROOT_DEV = Root_NFS;
294 char *cp; 130
295 131 if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) {
296 /* Set some default values */ 132 strlcpy(nfs_root_parms, line, sizeof(nfs_root_parms));
297 memset(&nfs_data, 0, sizeof(nfs_data)); 133 } else {
298 nfs_port = -1; 134 size_t n = strlen(line) + sizeof(NFS_ROOT) - 1;
299 nfs_data.version = NFS_MOUNT_VERSION; 135 if (n >= sizeof(nfs_root_parms))
300 nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ 136 line[sizeof(nfs_root_parms) - sizeof(NFS_ROOT) - 2] = '\0';
301 nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; 137 sprintf(nfs_root_parms, NFS_ROOT, line);
302 nfs_data.wsize = NFS_DEF_FILE_IO_SIZE;
303 nfs_data.acregmin = NFS_DEF_ACREGMIN;
304 nfs_data.acregmax = NFS_DEF_ACREGMAX;
305 nfs_data.acdirmin = NFS_DEF_ACDIRMIN;
306 nfs_data.acdirmax = NFS_DEF_ACDIRMAX;
307 strcpy(buf, NFS_ROOT);
308
309 /* Process options received from the remote server */
310 root_nfs_parse(root_server_path, buf);
311
312 /* Override them by options set on kernel command-line */
313 root_nfs_parse(name, buf);
314
315 cp = utsname()->nodename;
316 if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
317 printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
318 return -1;
319 } 138 }
320 sprintf(nfs_export_path, buf, cp); 139
140 /*
141 * Extract the IP address of the NFS server containing our
142 * root file system, if one was specified.
143 *
144 * Note: root_nfs_parse_addr() removes the server-ip from
145 * nfs_root_parms, if it exists.
146 */
147 root_server_addr = root_nfs_parse_addr(nfs_root_parms);
321 148
322 return 1; 149 return 1;
323} 150}
324 151
152__setup("nfsroot=", nfs_root_setup);
325 153
326/* 154static int __init root_nfs_copy(char *dest, const char *src,
327 * Get NFS server address. 155 const size_t destlen)
328 */
329static int __init root_nfs_addr(void)
330{ 156{
331 if ((servaddr = root_server_addr) == htonl(INADDR_NONE)) { 157 if (strlcpy(dest, src, destlen) > destlen)
332 printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n");
333 return -1; 158 return -1;
334 } 159 return 0;
160}
335 161
336 snprintf(nfs_data.hostname, sizeof(nfs_data.hostname), 162static int __init root_nfs_cat(char *dest, const char *src,
337 "%pI4", &servaddr); 163 const size_t destlen)
164{
165 if (strlcat(dest, src, destlen) > destlen)
166 return -1;
338 return 0; 167 return 0;
339} 168}
340 169
341/* 170/*
342 * Tell the user what's going on. 171 * Parse out root export path and mount options from
172 * passed-in string @incoming.
173 *
174 * Copy the export path into @exppath.
343 */ 175 */
344#ifdef NFSROOT_DEBUG 176static int __init root_nfs_parse_options(char *incoming, char *exppath,
345static void __init root_nfs_print(void) 177 const size_t exppathlen)
346{ 178{
347 printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", 179 char *p;
348 nfs_export_path, nfs_data.hostname);
349 printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n",
350 nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans);
351 printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n",
352 nfs_data.acregmin, nfs_data.acregmax,
353 nfs_data.acdirmin, nfs_data.acdirmax);
354 printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n",
355 nfs_port, mount_port, nfs_data.flags);
356}
357#endif
358
359 180
360static int __init root_nfs_init(void) 181 /*
361{ 182 * Set the NFS remote path
362#ifdef NFSROOT_DEBUG 183 */
363 nfs_debug |= NFSDBG_ROOT; 184 p = strsep(&incoming, ",");
364#endif 185 if (*p != '\0' && strcmp(p, "default") != 0)
186 if (root_nfs_copy(exppath, p, exppathlen))
187 return -1;
365 188
366 /* 189 /*
367 * Decode the root directory path name and NFS options from 190 * @incoming now points to the rest of the string; if it
368 * the kernel command line. This has to go here in order to 191 * contains something, append it to our root options buffer
369 * be able to use the client IP address for the remote root
370 * directory (necessary for pure RARP booting).
371 */ 192 */
372 if (root_nfs_name(nfs_root_name) < 0 || 193 if (incoming != NULL && *incoming != '\0')
373 root_nfs_addr() < 0) 194 if (root_nfs_cat(nfs_root_options, incoming,
374 return -1; 195 sizeof(nfs_root_options)))
196 return -1;
375 197
376#ifdef NFSROOT_DEBUG 198 /*
377 root_nfs_print(); 199 * Possibly prepare for more options to be appended
378#endif 200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
379 206
380 return 0; 207 return 0;
381} 208}
382 209
383
384/* 210/*
385 * Parse NFS server and directory information passed on the kernel 211 * Decode the export directory path name and NFS options from
386 * command line. 212 * the kernel command line. This has to be done late in order to
213 * use a dynamically acquired client IP address for the remote
214 * root directory path.
215 *
216 * Returns zero if successful; otherwise -1 is returned.
387 */ 217 */
388static int __init nfs_root_setup(char *line) 218static int __init root_nfs_data(char *cmdline)
389{ 219{
390 ROOT_DEV = Root_NFS; 220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
391 if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { 221 int len, retval = -1;
392 strlcpy(nfs_root_name, line, sizeof(nfs_root_name)); 222 char *tmp = NULL;
393 } else { 223 const size_t tmplen = sizeof(nfs_export_path);
394 int n = strlen(line) + sizeof(NFS_ROOT) - 1; 224
395 if (n >= sizeof(nfs_root_name)) 225 tmp = kzalloc(tmplen, GFP_KERNEL);
396 line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0'; 226 if (tmp == NULL)
397 sprintf(nfs_root_name, NFS_ROOT, line); 227 goto out_nomem;
228 strcpy(tmp, NFS_ROOT);
229
230 if (root_server_path[0] != '\0') {
231 dprintk("Root-NFS: DHCPv4 option 17: %s\n",
232 root_server_path);
233 if (root_nfs_parse_options(root_server_path, tmp, tmplen))
234 goto out_optionstoolong;
398 } 235 }
399 root_server_addr = root_nfs_parse_addr(nfs_root_name);
400 return 1;
401}
402
403__setup("nfsroot=", nfs_root_setup);
404
405/***************************************************************************
406 236
407 Routines to actually mount the root directory 237 if (cmdline[0] != '\0') {
238 dprintk("Root-NFS: nfsroot=%s\n", cmdline);
239 if (root_nfs_parse_options(cmdline, tmp, tmplen))
240 goto out_optionstoolong;
241 }
408 242
409 ***************************************************************************/ 243 /*
244 * Append mandatory options for nfsroot so they override
245 * what has come before
246 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4",
248 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option,
250 sizeof(nfs_root_options)))
251 goto out_optionstoolong;
410 252
411/* 253 /*
412 * Construct sockaddr_in from address and port number. 254 * Set up nfs_root_device. For NFS mounts, this looks like
413 */ 255 *
414static inline void 256 * server:/path
415set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) 257 *
416{ 258 * At this point, utsname()->nodename contains our local
417 sin->sin_family = AF_INET; 259 * IP address or hostname, set by ipconfig. If "%s" exists
418 sin->sin_addr.s_addr = addr; 260 * in tmp, substitute the nodename, then shovel the whole
419 sin->sin_port = port; 261 * mess into nfs_root_device.
420} 262 */
263 len = snprintf(nfs_export_path, sizeof(nfs_export_path),
264 tmp, utsname()->nodename);
265 if (len > (int)sizeof(nfs_export_path))
266 goto out_devnametoolong;
267 len = snprintf(nfs_root_device, sizeof(nfs_root_device),
268 "%pI4:%s", &servaddr, nfs_export_path);
269 if (len > (int)sizeof(nfs_root_device))
270 goto out_devnametoolong;
421 271
422/* 272 retval = 0;
423 * Query server portmapper for the port of a daemon program.
424 */
425static int __init root_nfs_getport(int program, int version, int proto)
426{
427 struct sockaddr_in sin;
428 273
429 printk(KERN_NOTICE "Looking up port of RPC %d/%d on %pI4\n", 274out:
430 program, version, &servaddr); 275 kfree(tmp);
431 set_sockaddr(&sin, servaddr, 0); 276 return retval;
432 return rpcb_getport_sync(&sin, program, version, proto); 277out_nomem:
278 printk(KERN_ERR "Root-NFS: could not allocate memory\n");
279 goto out;
280out_optionstoolong:
281 printk(KERN_ERR "Root-NFS: mount options string too long\n");
282 goto out;
283out_devnametoolong:
284 printk(KERN_ERR "Root-NFS: root device name too long.\n");
285 goto out;
433} 286}
434 287
435 288/**
436/* 289 * nfs_root_data - Return prepared 'data' for NFSROOT mount
437 * Use portmapper to find mountd and nfsd port numbers if not overriden 290 * @root_device: OUT: address of string containing NFSROOT device
438 * by the user. Use defaults if portmapper is not available. 291 * @root_data: OUT: address of string containing NFSROOT mount options
439 * XXX: Is there any nfs server with no portmapper? 292 *
293 * Returns zero and sets @root_device and @root_data if successful,
294 * otherwise -1 is returned.
440 */ 295 */
441static int __init root_nfs_ports(void) 296int __init nfs_root_data(char **root_device, char **root_data)
442{ 297{
443 int port; 298 servaddr = root_server_addr;
444 int nfsd_ver, mountd_ver; 299 if (servaddr == htonl(INADDR_NONE)) {
445 int nfsd_port, mountd_port; 300 printk(KERN_ERR "Root-NFS: no NFS server address\n");
446 int proto; 301 return -1;
447
448 if (nfs_data.flags & NFS_MOUNT_VER3) {
449 nfsd_ver = NFS3_VERSION;
450 mountd_ver = NFS_MNT3_VERSION;
451 nfsd_port = NFS_PORT;
452 mountd_port = NFS_MNT_PORT;
453 } else {
454 nfsd_ver = NFS2_VERSION;
455 mountd_ver = NFS_MNT_VERSION;
456 nfsd_port = NFS_PORT;
457 mountd_port = NFS_MNT_PORT;
458 }
459
460 proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP;
461
462 if (nfs_port < 0) {
463 if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) {
464 printk(KERN_ERR "Root-NFS: Unable to get nfsd port "
465 "number from server, using default\n");
466 port = nfsd_port;
467 }
468 nfs_port = port;
469 dprintk("Root-NFS: Portmapper on server returned %d "
470 "as nfsd port\n", port);
471 } 302 }
472 303
473 if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) { 304 if (root_nfs_data(nfs_root_parms) < 0)
474 printk(KERN_ERR "Root-NFS: Unable to get mountd port " 305 return -1;
475 "number from server, using default\n");
476 port = mountd_port;
477 }
478 mount_port = port;
479 dprintk("Root-NFS: mountd port is %d\n", port);
480 306
307 *root_device = nfs_root_device;
308 *root_data = nfs_root_options;
481 return 0; 309 return 0;
482} 310}
483
484
485/*
486 * Get a file handle from the server for the directory which is to be
487 * mounted.
488 */
489static int __init root_nfs_get_handle(void)
490{
491 struct sockaddr_in sin;
492 unsigned int auth_flav_len = 0;
493 struct nfs_mount_request request = {
494 .sap = (struct sockaddr *)&sin,
495 .salen = sizeof(sin),
496 .dirpath = nfs_export_path,
497 .version = (nfs_data.flags & NFS_MOUNT_VER3) ?
498 NFS_MNT3_VERSION : NFS_MNT_VERSION,
499 .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ?
500 XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP,
501 .auth_flav_len = &auth_flav_len,
502 };
503 int status = -ENOMEM;
504
505 request.fh = nfs_alloc_fhandle();
506 if (!request.fh)
507 goto out;
508 set_sockaddr(&sin, servaddr, htons(mount_port));
509 status = nfs_mount(&request);
510 if (status < 0)
511 printk(KERN_ERR "Root-NFS: Server returned error %d "
512 "while mounting %s\n", status, nfs_export_path);
513 else {
514 nfs_data.root.size = request.fh->size;
515 memcpy(&nfs_data.root.data, request.fh->data, request.fh->size);
516 }
517 nfs_free_fhandle(request.fh);
518out:
519 return status;
520}
521
522/*
523 * Get the NFS port numbers and file handle, and return the prepared 'data'
524 * argument for mount() if everything went OK. Return NULL otherwise.
525 */
526void * __init nfs_root_data(void)
527{
528 if (root_nfs_init() < 0
529 || root_nfs_ports() < 0
530 || root_nfs_get_handle() < 0)
531 return NULL;
532 set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, htons(nfs_port));
533 return (void*)&nfs_data;
534}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
new file mode 100644
index 000000000000..db773428f95f
--- /dev/null
+++ b/fs/nfs/pnfs.c
@@ -0,0 +1,783 @@
1/*
2 * pNFS functions to call and manage layout drivers.
3 *
4 * Copyright (c) 2002 [year of first publication]
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#include <linux/nfs_fs.h>
31#include "internal.h"
32#include "pnfs.h"
33
34#define NFSDBG_FACILITY NFSDBG_PNFS
35
36/* Locking:
37 *
38 * pnfs_spinlock:
39 * protects pnfs_modules_tbl.
40 */
41static DEFINE_SPINLOCK(pnfs_spinlock);
42
43/*
44 * pnfs_modules_tbl holds all pnfs modules
45 */
46static LIST_HEAD(pnfs_modules_tbl);
47
48/* Return the registered pnfs layout driver module matching given id */
49static struct pnfs_layoutdriver_type *
50find_pnfs_driver_locked(u32 id)
51{
52 struct pnfs_layoutdriver_type *local;
53
54 list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid)
55 if (local->id == id)
56 goto out;
57 local = NULL;
58out:
59 dprintk("%s: Searching for id %u, found %p\n", __func__, id, local);
60 return local;
61}
62
63static struct pnfs_layoutdriver_type *
64find_pnfs_driver(u32 id)
65{
66 struct pnfs_layoutdriver_type *local;
67
68 spin_lock(&pnfs_spinlock);
69 local = find_pnfs_driver_locked(id);
70 spin_unlock(&pnfs_spinlock);
71 return local;
72}
73
74void
75unset_pnfs_layoutdriver(struct nfs_server *nfss)
76{
77 if (nfss->pnfs_curr_ld) {
78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
79 module_put(nfss->pnfs_curr_ld->owner);
80 }
81 nfss->pnfs_curr_ld = NULL;
82}
83
84/*
85 * Try to set the server's pnfs module to the pnfs layout type specified by id.
86 * Currently only one pNFS layout driver per filesystem is supported.
87 *
88 * @id layout type. Zero (illegal layout type) indicates pNFS not in use.
89 */
90void
91set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
92{
93 struct pnfs_layoutdriver_type *ld_type = NULL;
94
95 if (id == 0)
96 goto out_no_driver;
97 if (!(server->nfs_client->cl_exchange_flags &
98 (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) {
99 printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__,
100 id, server->nfs_client->cl_exchange_flags);
101 goto out_no_driver;
102 }
103 ld_type = find_pnfs_driver(id);
104 if (!ld_type) {
105 request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id);
106 ld_type = find_pnfs_driver(id);
107 if (!ld_type) {
108 dprintk("%s: No pNFS module found for %u.\n",
109 __func__, id);
110 goto out_no_driver;
111 }
112 }
113 if (!try_module_get(ld_type->owner)) {
114 dprintk("%s: Could not grab reference on module\n", __func__);
115 goto out_no_driver;
116 }
117 server->pnfs_curr_ld = ld_type;
118 if (ld_type->set_layoutdriver(server)) {
119 printk(KERN_ERR
120 "%s: Error initializing mount point for layout driver %u.\n",
121 __func__, id);
122 module_put(ld_type->owner);
123 goto out_no_driver;
124 }
125 dprintk("%s: pNFS module for %u set\n", __func__, id);
126 return;
127
128out_no_driver:
129 dprintk("%s: Using NFSv4 I/O\n", __func__);
130 server->pnfs_curr_ld = NULL;
131}
132
133int
134pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
135{
136 int status = -EINVAL;
137 struct pnfs_layoutdriver_type *tmp;
138
139 if (ld_type->id == 0) {
140 printk(KERN_ERR "%s id 0 is reserved\n", __func__);
141 return status;
142 }
143 if (!ld_type->alloc_lseg || !ld_type->free_lseg) {
144 printk(KERN_ERR "%s Layout driver must provide "
145 "alloc_lseg and free_lseg.\n", __func__);
146 return status;
147 }
148
149 spin_lock(&pnfs_spinlock);
150 tmp = find_pnfs_driver_locked(ld_type->id);
151 if (!tmp) {
152 list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl);
153 status = 0;
154 dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id,
155 ld_type->name);
156 } else {
157 printk(KERN_ERR "%s Module with id %d already loaded!\n",
158 __func__, ld_type->id);
159 }
160 spin_unlock(&pnfs_spinlock);
161
162 return status;
163}
164EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver);
165
166void
167pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
168{
169 dprintk("%s Deregistering id:%u\n", __func__, ld_type->id);
170 spin_lock(&pnfs_spinlock);
171 list_del(&ld_type->pnfs_tblid);
172 spin_unlock(&pnfs_spinlock);
173}
174EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
175
176/*
177 * pNFS client layout cache
178 */
179
180static void
181get_layout_hdr_locked(struct pnfs_layout_hdr *lo)
182{
183 assert_spin_locked(&lo->inode->i_lock);
184 lo->refcount++;
185}
186
187static void
188put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
189{
190 assert_spin_locked(&lo->inode->i_lock);
191 BUG_ON(lo->refcount == 0);
192
193 lo->refcount--;
194 if (!lo->refcount) {
195 dprintk("%s: freeing layout cache %p\n", __func__, lo);
196 BUG_ON(!list_empty(&lo->layouts));
197 NFS_I(lo->inode)->layout = NULL;
198 kfree(lo);
199 }
200}
201
202void
203put_layout_hdr(struct inode *inode)
204{
205 spin_lock(&inode->i_lock);
206 put_layout_hdr_locked(NFS_I(inode)->layout);
207 spin_unlock(&inode->i_lock);
208}
209
210static void
211init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
212{
213 INIT_LIST_HEAD(&lseg->fi_list);
214 kref_init(&lseg->kref);
215 lseg->layout = lo;
216}
217
218/* Called without i_lock held, as the free_lseg call may sleep */
219static void
220destroy_lseg(struct kref *kref)
221{
222 struct pnfs_layout_segment *lseg =
223 container_of(kref, struct pnfs_layout_segment, kref);
224 struct inode *ino = lseg->layout->inode;
225
226 dprintk("--> %s\n", __func__);
227 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
228 /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
229 put_layout_hdr(ino);
230}
231
232static void
233put_lseg(struct pnfs_layout_segment *lseg)
234{
235 if (!lseg)
236 return;
237
238 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
239 atomic_read(&lseg->kref.refcount));
240 kref_put(&lseg->kref, destroy_lseg);
241}
242
243static void
244pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list)
245{
246 struct pnfs_layout_segment *lseg, *next;
247 struct nfs_client *clp;
248
249 dprintk("%s:Begin lo %p\n", __func__, lo);
250
251 assert_spin_locked(&lo->inode->i_lock);
252 list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
253 dprintk("%s: freeing lseg %p\n", __func__, lseg);
254 list_move(&lseg->fi_list, tmp_list);
255 }
256 clp = NFS_SERVER(lo->inode)->nfs_client;
257 spin_lock(&clp->cl_lock);
258 /* List does not take a reference, so no need for put here */
259 list_del_init(&lo->layouts);
260 spin_unlock(&clp->cl_lock);
261 write_seqlock(&lo->seqlock);
262 clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
263 write_sequnlock(&lo->seqlock);
264
265 dprintk("%s:Return\n", __func__);
266}
267
268static void
269pnfs_free_lseg_list(struct list_head *tmp_list)
270{
271 struct pnfs_layout_segment *lseg;
272
273 while (!list_empty(tmp_list)) {
274 lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
275 fi_list);
276 dprintk("%s calling put_lseg on %p\n", __func__, lseg);
277 list_del(&lseg->fi_list);
278 put_lseg(lseg);
279 }
280}
281
282void
283pnfs_destroy_layout(struct nfs_inode *nfsi)
284{
285 struct pnfs_layout_hdr *lo;
286 LIST_HEAD(tmp_list);
287
288 spin_lock(&nfsi->vfs_inode.i_lock);
289 lo = nfsi->layout;
290 if (lo) {
291 pnfs_clear_lseg_list(lo, &tmp_list);
292 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
293 put_layout_hdr_locked(lo);
294 }
295 spin_unlock(&nfsi->vfs_inode.i_lock);
296 pnfs_free_lseg_list(&tmp_list);
297}
298
299/*
300 * Called by the state manger to remove all layouts established under an
301 * expired lease.
302 */
303void
304pnfs_destroy_all_layouts(struct nfs_client *clp)
305{
306 struct pnfs_layout_hdr *lo;
307 LIST_HEAD(tmp_list);
308
309 spin_lock(&clp->cl_lock);
310 list_splice_init(&clp->cl_layouts, &tmp_list);
311 spin_unlock(&clp->cl_lock);
312
313 while (!list_empty(&tmp_list)) {
314 lo = list_entry(tmp_list.next, struct pnfs_layout_hdr,
315 layouts);
316 dprintk("%s freeing layout for inode %lu\n", __func__,
317 lo->inode->i_ino);
318 pnfs_destroy_layout(NFS_I(lo->inode));
319 }
320}
321
322/* update lo->stateid with new if is more recent
323 *
324 * lo->stateid could be the open stateid, in which case we just use what given.
325 */
326static void
327pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
328 const nfs4_stateid *new)
329{
330 nfs4_stateid *old = &lo->stateid;
331 bool overwrite = false;
332
333 write_seqlock(&lo->seqlock);
334 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) ||
335 memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
336 overwrite = true;
337 else {
338 u32 oldseq, newseq;
339
340 oldseq = be32_to_cpu(old->stateid.seqid);
341 newseq = be32_to_cpu(new->stateid.seqid);
342 if ((int)(newseq - oldseq) > 0)
343 overwrite = true;
344 }
345 if (overwrite)
346 memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
347 write_sequnlock(&lo->seqlock);
348}
349
350static void
351pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
352 struct nfs4_state *state)
353{
354 int seq;
355
356 dprintk("--> %s\n", __func__);
357 write_seqlock(&lo->seqlock);
358 do {
359 seq = read_seqbegin(&state->seqlock);
360 memcpy(lo->stateid.data, state->stateid.data,
361 sizeof(state->stateid.data));
362 } while (read_seqretry(&state->seqlock, seq));
363 set_bit(NFS_LAYOUT_STATEID_SET, &lo->state);
364 write_sequnlock(&lo->seqlock);
365 dprintk("<-- %s\n", __func__);
366}
367
368void
369pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
370 struct nfs4_state *open_state)
371{
372 int seq;
373
374 dprintk("--> %s\n", __func__);
375 do {
376 seq = read_seqbegin(&lo->seqlock);
377 if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) {
378 /* This will trigger retry of the read */
379 pnfs_layout_from_open_stateid(lo, open_state);
380 } else
381 memcpy(dst->data, lo->stateid.data,
382 sizeof(lo->stateid.data));
383 } while (read_seqretry(&lo->seqlock, seq));
384 dprintk("<-- %s\n", __func__);
385}
386
387/*
388* Get layout from server.
389* for now, assume that whole file layouts are requested.
390* arg->offset: 0
391* arg->length: all ones
392*/
393static struct pnfs_layout_segment *
394send_layoutget(struct pnfs_layout_hdr *lo,
395 struct nfs_open_context *ctx,
396 u32 iomode)
397{
398 struct inode *ino = lo->inode;
399 struct nfs_server *server = NFS_SERVER(ino);
400 struct nfs4_layoutget *lgp;
401 struct pnfs_layout_segment *lseg = NULL;
402
403 dprintk("--> %s\n", __func__);
404
405 BUG_ON(ctx == NULL);
406 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
407 if (lgp == NULL) {
408 put_layout_hdr(lo->inode);
409 return NULL;
410 }
411 lgp->args.minlength = NFS4_MAX_UINT64;
412 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
413 lgp->args.range.iomode = iomode;
414 lgp->args.range.offset = 0;
415 lgp->args.range.length = NFS4_MAX_UINT64;
416 lgp->args.type = server->pnfs_curr_ld->id;
417 lgp->args.inode = ino;
418 lgp->args.ctx = get_nfs_open_context(ctx);
419 lgp->lsegpp = &lseg;
420
421 /* Synchronously retrieve layout information from server and
422 * store in lseg.
423 */
424 nfs4_proc_layoutget(lgp);
425 if (!lseg) {
426 /* remember that LAYOUTGET failed and suspend trying */
427 set_bit(lo_fail_bit(iomode), &lo->state);
428 }
429 return lseg;
430}
431
432/*
433 * Compare two layout segments for sorting into layout cache.
434 * We want to preferentially return RW over RO layouts, so ensure those
435 * are seen first.
436 */
437static s64
438cmp_layout(u32 iomode1, u32 iomode2)
439{
440 /* read > read/write */
441 return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ);
442}
443
444static void
445pnfs_insert_layout(struct pnfs_layout_hdr *lo,
446 struct pnfs_layout_segment *lseg)
447{
448 struct pnfs_layout_segment *lp;
449 int found = 0;
450
451 dprintk("%s:Begin\n", __func__);
452
453 assert_spin_locked(&lo->inode->i_lock);
454 if (list_empty(&lo->segs)) {
455 struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
456
457 spin_lock(&clp->cl_lock);
458 BUG_ON(!list_empty(&lo->layouts));
459 list_add_tail(&lo->layouts, &clp->cl_layouts);
460 spin_unlock(&clp->cl_lock);
461 }
462 list_for_each_entry(lp, &lo->segs, fi_list) {
463 if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0)
464 continue;
465 list_add_tail(&lseg->fi_list, &lp->fi_list);
466 dprintk("%s: inserted lseg %p "
467 "iomode %d offset %llu length %llu before "
468 "lp %p iomode %d offset %llu length %llu\n",
469 __func__, lseg, lseg->range.iomode,
470 lseg->range.offset, lseg->range.length,
471 lp, lp->range.iomode, lp->range.offset,
472 lp->range.length);
473 found = 1;
474 break;
475 }
476 if (!found) {
477 list_add_tail(&lseg->fi_list, &lo->segs);
478 dprintk("%s: inserted lseg %p "
479 "iomode %d offset %llu length %llu at tail\n",
480 __func__, lseg, lseg->range.iomode,
481 lseg->range.offset, lseg->range.length);
482 }
483 get_layout_hdr_locked(lo);
484
485 dprintk("%s:Return\n", __func__);
486}
487
488static struct pnfs_layout_hdr *
489alloc_init_layout_hdr(struct inode *ino)
490{
491 struct pnfs_layout_hdr *lo;
492
493 lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL);
494 if (!lo)
495 return NULL;
496 lo->refcount = 1;
497 INIT_LIST_HEAD(&lo->layouts);
498 INIT_LIST_HEAD(&lo->segs);
499 seqlock_init(&lo->seqlock);
500 lo->inode = ino;
501 return lo;
502}
503
504static struct pnfs_layout_hdr *
505pnfs_find_alloc_layout(struct inode *ino)
506{
507 struct nfs_inode *nfsi = NFS_I(ino);
508 struct pnfs_layout_hdr *new = NULL;
509
510 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
511
512 assert_spin_locked(&ino->i_lock);
513 if (nfsi->layout)
514 return nfsi->layout;
515
516 spin_unlock(&ino->i_lock);
517 new = alloc_init_layout_hdr(ino);
518 spin_lock(&ino->i_lock);
519
520 if (likely(nfsi->layout == NULL)) /* Won the race? */
521 nfsi->layout = new;
522 else
523 kfree(new);
524 return nfsi->layout;
525}
526
527/*
528 * iomode matching rules:
529 * iomode lseg match
530 * ----- ----- -----
531 * ANY READ true
532 * ANY RW true
533 * RW READ false
534 * RW RW true
535 * READ READ true
536 * READ RW true
537 */
538static int
539is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode)
540{
541 return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW);
542}
543
544/*
545 * lookup range in layout
546 */
547static struct pnfs_layout_segment *
548pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode)
549{
550 struct pnfs_layout_segment *lseg, *ret = NULL;
551
552 dprintk("%s:Begin\n", __func__);
553
554 assert_spin_locked(&lo->inode->i_lock);
555 list_for_each_entry(lseg, &lo->segs, fi_list) {
556 if (is_matching_lseg(lseg, iomode)) {
557 ret = lseg;
558 break;
559 }
560 if (cmp_layout(iomode, lseg->range.iomode) > 0)
561 break;
562 }
563
564 dprintk("%s:Return lseg %p ref %d\n",
565 __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0);
566 return ret;
567}
568
569/*
570 * Layout segment is retreived from the server if not cached.
571 * The appropriate layout segment is referenced and returned to the caller.
572 */
573struct pnfs_layout_segment *
574pnfs_update_layout(struct inode *ino,
575 struct nfs_open_context *ctx,
576 enum pnfs_iomode iomode)
577{
578 struct nfs_inode *nfsi = NFS_I(ino);
579 struct pnfs_layout_hdr *lo;
580 struct pnfs_layout_segment *lseg = NULL;
581
582 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
583 return NULL;
584 spin_lock(&ino->i_lock);
585 lo = pnfs_find_alloc_layout(ino);
586 if (lo == NULL) {
587 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
588 goto out_unlock;
589 }
590
591 /* Check to see if the layout for the given range already exists */
592 lseg = pnfs_has_layout(lo, iomode);
593 if (lseg) {
594 dprintk("%s: Using cached lseg %p for iomode %d)\n",
595 __func__, lseg, iomode);
596 goto out_unlock;
597 }
598
599 /* if LAYOUTGET already failed once we don't try again */
600 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state))
601 goto out_unlock;
602
603 get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */
604 spin_unlock(&ino->i_lock);
605
606 lseg = send_layoutget(lo, ctx, iomode);
607out:
608 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
609 nfsi->layout->state, lseg);
610 return lseg;
611out_unlock:
612 spin_unlock(&ino->i_lock);
613 goto out;
614}
615
616int
617pnfs_layout_process(struct nfs4_layoutget *lgp)
618{
619 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
620 struct nfs4_layoutget_res *res = &lgp->res;
621 struct pnfs_layout_segment *lseg;
622 struct inode *ino = lo->inode;
623 int status = 0;
624
625 /* Inject layout blob into I/O device driver */
626 lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res);
627 if (!lseg || IS_ERR(lseg)) {
628 if (!lseg)
629 status = -ENOMEM;
630 else
631 status = PTR_ERR(lseg);
632 dprintk("%s: Could not allocate layout: error %d\n",
633 __func__, status);
634 goto out;
635 }
636
637 spin_lock(&ino->i_lock);
638 init_lseg(lo, lseg);
639 lseg->range = res->range;
640 *lgp->lsegpp = lseg;
641 pnfs_insert_layout(lo, lseg);
642
643 /* Done processing layoutget. Set the layout stateid */
644 pnfs_set_layout_stateid(lo, &res->stateid);
645 spin_unlock(&ino->i_lock);
646out:
647 return status;
648}
649
650/*
651 * Device ID cache. Currently supports one layout type per struct nfs_client.
652 * Add layout type to the lookup key to expand to support multiple types.
653 */
654int
655pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
656 void (*free_callback)(struct pnfs_deviceid_node *))
657{
658 struct pnfs_deviceid_cache *c;
659
660 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
661 if (!c)
662 return -ENOMEM;
663 spin_lock(&clp->cl_lock);
664 if (clp->cl_devid_cache != NULL) {
665 atomic_inc(&clp->cl_devid_cache->dc_ref);
666 dprintk("%s [kref [%d]]\n", __func__,
667 atomic_read(&clp->cl_devid_cache->dc_ref));
668 kfree(c);
669 } else {
670 /* kzalloc initializes hlists */
671 spin_lock_init(&c->dc_lock);
672 atomic_set(&c->dc_ref, 1);
673 c->dc_free_callback = free_callback;
674 clp->cl_devid_cache = c;
675 dprintk("%s [new]\n", __func__);
676 }
677 spin_unlock(&clp->cl_lock);
678 return 0;
679}
680EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
681
682/*
683 * Called from pnfs_layoutdriver_type->free_lseg
684 * last layout segment reference frees deviceid
685 */
686void
687pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
688 struct pnfs_deviceid_node *devid)
689{
690 struct nfs4_deviceid *id = &devid->de_id;
691 struct pnfs_deviceid_node *d;
692 struct hlist_node *n;
693 long h = nfs4_deviceid_hash(id);
694
695 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
696 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
697 return;
698
699 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
700 if (!memcmp(&d->de_id, id, sizeof(*id))) {
701 hlist_del_rcu(&d->de_node);
702 spin_unlock(&c->dc_lock);
703 synchronize_rcu();
704 c->dc_free_callback(devid);
705 return;
706 }
707 spin_unlock(&c->dc_lock);
708 /* Why wasn't it found in the list? */
709 BUG();
710}
711EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
712
713/* Find and reference a deviceid */
714struct pnfs_deviceid_node *
715pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
716{
717 struct pnfs_deviceid_node *d;
718 struct hlist_node *n;
719 long hash = nfs4_deviceid_hash(id);
720
721 dprintk("--> %s hash %ld\n", __func__, hash);
722 rcu_read_lock();
723 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
724 if (!memcmp(&d->de_id, id, sizeof(*id))) {
725 if (!atomic_inc_not_zero(&d->de_ref)) {
726 goto fail;
727 } else {
728 rcu_read_unlock();
729 return d;
730 }
731 }
732 }
733fail:
734 rcu_read_unlock();
735 return NULL;
736}
737EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
738
739/*
740 * Add a deviceid to the cache.
741 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
742 */
743struct pnfs_deviceid_node *
744pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
745{
746 struct pnfs_deviceid_node *d;
747 long hash = nfs4_deviceid_hash(&new->de_id);
748
749 dprintk("--> %s hash %ld\n", __func__, hash);
750 spin_lock(&c->dc_lock);
751 d = pnfs_find_get_deviceid(c, &new->de_id);
752 if (d) {
753 spin_unlock(&c->dc_lock);
754 dprintk("%s [discard]\n", __func__);
755 c->dc_free_callback(new);
756 return d;
757 }
758 INIT_HLIST_NODE(&new->de_node);
759 atomic_set(&new->de_ref, 1);
760 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
761 spin_unlock(&c->dc_lock);
762 dprintk("%s [new]\n", __func__);
763 return new;
764}
765EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
766
767void
768pnfs_put_deviceid_cache(struct nfs_client *clp)
769{
770 struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
771
772 dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache);
773 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
774 int i;
775 /* Verify cache is empty */
776 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
777 BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
778 clp->cl_devid_cache = NULL;
779 spin_unlock(&clp->cl_lock);
780 kfree(local);
781 }
782}
783EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
new file mode 100644
index 000000000000..e12367d50489
--- /dev/null
+++ b/fs/nfs/pnfs.h
@@ -0,0 +1,189 @@
1/*
2 * pNFS client data structures.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 *
10 * Permission is granted to use, copy, create derivative works, and
11 * redistribute this software and such derivative works for any purpose,
12 * so long as the name of the University of Michigan is not used in
13 * any advertising or publicity pertaining to the use or distribution
14 * of this software without specific, written prior authorization. If
15 * the above copyright notice or any other identification of the
16 * University of Michigan is included in any copy of any portion of
17 * this software, then the disclaimer below must also be included.
18 *
19 * This software is provided as is, without representation or warranty
20 * of any kind either express or implied, including without limitation
21 * the implied warranties of merchantability, fitness for a particular
22 * purpose, or noninfringement. The Regents of the University of
23 * Michigan shall not be liable for any damages, including special,
24 * indirect, incidental, or consequential damages, with respect to any
25 * claim arising out of or in connection with the use of the software,
26 * even if it has been or is hereafter advised of the possibility of
27 * such damages.
28 */
29
30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H
32
33struct pnfs_layout_segment {
34 struct list_head fi_list;
35 struct pnfs_layout_range range;
36 struct kref kref;
37 struct pnfs_layout_hdr *layout;
38};
39
40#ifdef CONFIG_NFS_V4_1
41
42#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
43
44enum {
45 NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
46 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
47 NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
48};
49
50/* Per-layout driver specific registration structure */
51struct pnfs_layoutdriver_type {
52 struct list_head pnfs_tblid;
53 const u32 id;
54 const char *name;
55 struct module *owner;
56 int (*set_layoutdriver) (struct nfs_server *);
57 int (*clear_layoutdriver) (struct nfs_server *);
58 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
59 void (*free_lseg) (struct pnfs_layout_segment *lseg);
60};
61
62struct pnfs_layout_hdr {
63 unsigned long refcount;
64 struct list_head layouts; /* other client layouts */
65 struct list_head segs; /* layout segments list */
66 seqlock_t seqlock; /* Protects the stateid */
67 nfs4_stateid stateid;
68 unsigned long state;
69 struct inode *inode;
70};
71
72struct pnfs_device {
73 struct nfs4_deviceid dev_id;
74 unsigned int layout_type;
75 unsigned int mincount;
76 struct page **pages;
77 void *area;
78 unsigned int pgbase;
79 unsigned int pglen;
80};
81
82/*
83 * Device ID RCU cache. A device ID is unique per client ID and layout type.
84 */
85#define NFS4_DEVICE_ID_HASH_BITS 5
86#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
87#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
88
89static inline u32
90nfs4_deviceid_hash(struct nfs4_deviceid *id)
91{
92 unsigned char *cptr = (unsigned char *)id->data;
93 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
94 u32 x = 0;
95
96 while (nbytes--) {
97 x *= 37;
98 x += *cptr++;
99 }
100 return x & NFS4_DEVICE_ID_HASH_MASK;
101}
102
103struct pnfs_deviceid_node {
104 struct hlist_node de_node;
105 struct nfs4_deviceid de_id;
106 atomic_t de_ref;
107};
108
109struct pnfs_deviceid_cache {
110 spinlock_t dc_lock;
111 atomic_t dc_ref;
112 void (*dc_free_callback)(struct pnfs_deviceid_node *);
113 struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
114};
115
116extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
117 void (*free_callback)(struct pnfs_deviceid_node *));
118extern void pnfs_put_deviceid_cache(struct nfs_client *);
119extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
120 struct pnfs_deviceid_cache *,
121 struct nfs4_deviceid *);
122extern struct pnfs_deviceid_node *pnfs_add_deviceid(
123 struct pnfs_deviceid_cache *,
124 struct pnfs_deviceid_node *);
125extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
126 struct pnfs_deviceid_node *devid);
127
128extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
129extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
130
131/* nfs4proc.c */
132extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
133 struct pnfs_device *dev);
134extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
135
136/* pnfs.c */
137struct pnfs_layout_segment *
138pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
139 enum pnfs_iomode access_type);
140void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
141void unset_pnfs_layoutdriver(struct nfs_server *);
142int pnfs_layout_process(struct nfs4_layoutget *lgp);
143void pnfs_destroy_layout(struct nfs_inode *);
144void pnfs_destroy_all_layouts(struct nfs_client *);
145void put_layout_hdr(struct inode *inode);
146void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
147 struct nfs4_state *open_state);
148
149
150static inline int lo_fail_bit(u32 iomode)
151{
152 return iomode == IOMODE_RW ?
153 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
154}
155
156/* Return true if a layout driver is being used for this mountpoint */
157static inline int pnfs_enabled_sb(struct nfs_server *nfss)
158{
159 return nfss->pnfs_curr_ld != NULL;
160}
161
162#else /* CONFIG_NFS_V4_1 */
163
164static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
165{
166}
167
168static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
169{
170}
171
172static inline struct pnfs_layout_segment *
173pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
174 enum pnfs_iomode access_type)
175{
176 return NULL;
177}
178
179static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id)
180{
181}
182
183static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
184{
185}
186
187#endif /* CONFIG_NFS_V4_1 */
188
189#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 611bec22f552..58e7f84fc1fd 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -258,7 +258,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data)
258 258
259static int 259static int
260nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 260nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
261 int flags, struct nameidata *nd) 261 int flags, struct nfs_open_context *ctx)
262{ 262{
263 struct nfs_createdata *data; 263 struct nfs_createdata *data;
264 struct rpc_message msg = { 264 struct rpc_message msg = {
@@ -365,17 +365,32 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir)
365 return 1; 365 return 1;
366} 366}
367 367
368static void
369nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir)
370{
371 msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME];
372}
373
374static int
375nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
376 struct inode *new_dir)
377{
378 if (nfs_async_handle_expired_key(task))
379 return 0;
380 nfs_mark_for_revalidate(old_dir);
381 nfs_mark_for_revalidate(new_dir);
382 return 1;
383}
384
368static int 385static int
369nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, 386nfs_proc_rename(struct inode *old_dir, struct qstr *old_name,
370 struct inode *new_dir, struct qstr *new_name) 387 struct inode *new_dir, struct qstr *new_name)
371{ 388{
372 struct nfs_renameargs arg = { 389 struct nfs_renameargs arg = {
373 .fromfh = NFS_FH(old_dir), 390 .old_dir = NFS_FH(old_dir),
374 .fromname = old_name->name, 391 .old_name = old_name,
375 .fromlen = old_name->len, 392 .new_dir = NFS_FH(new_dir),
376 .tofh = NFS_FH(new_dir), 393 .new_name = new_name,
377 .toname = new_name->name,
378 .tolen = new_name->len
379 }; 394 };
380 struct rpc_message msg = { 395 struct rpc_message msg = {
381 .rpc_proc = &nfs_procedures[NFSPROC_RENAME], 396 .rpc_proc = &nfs_procedures[NFSPROC_RENAME],
@@ -519,14 +534,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name)
519 */ 534 */
520static int 535static int
521nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, 536nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
522 u64 cookie, struct page *page, unsigned int count, int plus) 537 u64 cookie, struct page **pages, unsigned int count, int plus)
523{ 538{
524 struct inode *dir = dentry->d_inode; 539 struct inode *dir = dentry->d_inode;
525 struct nfs_readdirargs arg = { 540 struct nfs_readdirargs arg = {
526 .fh = NFS_FH(dir), 541 .fh = NFS_FH(dir),
527 .cookie = cookie, 542 .cookie = cookie,
528 .count = count, 543 .count = count,
529 .pages = &page, 544 .pages = pages,
530 }; 545 };
531 struct rpc_message msg = { 546 struct rpc_message msg = {
532 .rpc_proc = &nfs_procedures[NFSPROC_READDIR], 547 .rpc_proc = &nfs_procedures[NFSPROC_READDIR],
@@ -705,6 +720,8 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
705 .unlink_setup = nfs_proc_unlink_setup, 720 .unlink_setup = nfs_proc_unlink_setup,
706 .unlink_done = nfs_proc_unlink_done, 721 .unlink_done = nfs_proc_unlink_done,
707 .rename = nfs_proc_rename, 722 .rename = nfs_proc_rename,
723 .rename_setup = nfs_proc_rename_setup,
724 .rename_done = nfs_proc_rename_done,
708 .link = nfs_proc_link, 725 .link = nfs_proc_link,
709 .symlink = nfs_proc_symlink, 726 .symlink = nfs_proc_symlink,
710 .mkdir = nfs_proc_mkdir, 727 .mkdir = nfs_proc_mkdir,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 87adc2744246..e4b62c6f5a6e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -25,6 +25,7 @@
25#include "internal.h" 25#include "internal.h"
26#include "iostat.h" 26#include "iostat.h"
27#include "fscache.h" 27#include "fscache.h"
28#include "pnfs.h"
28 29
29#define NFSDBG_FACILITY NFSDBG_PAGECACHE 30#define NFSDBG_FACILITY NFSDBG_PAGECACHE
30 31
@@ -46,7 +47,6 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
46 memset(p, 0, sizeof(*p)); 47 memset(p, 0, sizeof(*p));
47 INIT_LIST_HEAD(&p->pages); 48 INIT_LIST_HEAD(&p->pages);
48 p->npages = pagecount; 49 p->npages = pagecount;
49 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
50 if (pagecount <= ARRAY_SIZE(p->page_array)) 50 if (pagecount <= ARRAY_SIZE(p->page_array))
51 p->pagevec = p->page_array; 51 p->pagevec = p->page_array;
52 else { 52 else {
@@ -121,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
121 len = nfs_page_length(page); 121 len = nfs_page_length(page);
122 if (len == 0) 122 if (len == 0)
123 return nfs_return_empty_page(page); 123 return nfs_return_empty_page(page);
124 pnfs_update_layout(inode, ctx, IOMODE_READ);
124 new = nfs_create_request(ctx, inode, page, 0, len); 125 new = nfs_create_request(ctx, inode, page, 0, len);
125 if (IS_ERR(new)) { 126 if (IS_ERR(new)) {
126 unlock_page(page); 127 unlock_page(page);
@@ -625,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
625 if (ret == 0) 626 if (ret == 0)
626 goto read_complete; /* all pages were read */ 627 goto read_complete; /* all pages were read */
627 628
629 pnfs_update_layout(inode, desc.ctx, IOMODE_READ);
628 if (rsize < PAGE_CACHE_SIZE) 630 if (rsize < PAGE_CACHE_SIZE)
629 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 631 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
630 else 632 else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f4cbf0c306c6..3600ec700d58 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -100,6 +100,7 @@ enum {
100 Opt_addr, Opt_mountaddr, Opt_clientaddr, 100 Opt_addr, Opt_mountaddr, Opt_clientaddr,
101 Opt_lookupcache, 101 Opt_lookupcache,
102 Opt_fscache_uniq, 102 Opt_fscache_uniq,
103 Opt_local_lock,
103 104
104 /* Special mount options */ 105 /* Special mount options */
105 Opt_userspace, Opt_deprecated, Opt_sloppy, 106 Opt_userspace, Opt_deprecated, Opt_sloppy,
@@ -171,6 +172,7 @@ static const match_table_t nfs_mount_option_tokens = {
171 172
172 { Opt_lookupcache, "lookupcache=%s" }, 173 { Opt_lookupcache, "lookupcache=%s" },
173 { Opt_fscache_uniq, "fsc=%s" }, 174 { Opt_fscache_uniq, "fsc=%s" },
175 { Opt_local_lock, "local_lock=%s" },
174 176
175 { Opt_err, NULL } 177 { Opt_err, NULL }
176}; 178};
@@ -236,6 +238,22 @@ static match_table_t nfs_lookupcache_tokens = {
236 { Opt_lookupcache_err, NULL } 238 { Opt_lookupcache_err, NULL }
237}; 239};
238 240
241enum {
242 Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix,
243 Opt_local_lock_none,
244
245 Opt_local_lock_err
246};
247
248static match_table_t nfs_local_lock_tokens = {
249 { Opt_local_lock_all, "all" },
250 { Opt_local_lock_flock, "flock" },
251 { Opt_local_lock_posix, "posix" },
252 { Opt_local_lock_none, "none" },
253
254 { Opt_local_lock_err, NULL }
255};
256
239 257
240static void nfs_umount_begin(struct super_block *); 258static void nfs_umount_begin(struct super_block *);
241static int nfs_statfs(struct dentry *, struct kstatfs *); 259static int nfs_statfs(struct dentry *, struct kstatfs *);
@@ -622,6 +640,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
622 const struct proc_nfs_info *nfs_infop; 640 const struct proc_nfs_info *nfs_infop;
623 struct nfs_client *clp = nfss->nfs_client; 641 struct nfs_client *clp = nfss->nfs_client;
624 u32 version = clp->rpc_ops->version; 642 u32 version = clp->rpc_ops->version;
643 int local_flock, local_fcntl;
625 644
626 seq_printf(m, ",vers=%u", version); 645 seq_printf(m, ",vers=%u", version);
627 seq_printf(m, ",rsize=%u", nfss->rsize); 646 seq_printf(m, ",rsize=%u", nfss->rsize);
@@ -670,6 +689,18 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
670 else 689 else
671 seq_printf(m, ",lookupcache=pos"); 690 seq_printf(m, ",lookupcache=pos");
672 } 691 }
692
693 local_flock = nfss->flags & NFS_MOUNT_LOCAL_FLOCK;
694 local_fcntl = nfss->flags & NFS_MOUNT_LOCAL_FCNTL;
695
696 if (!local_flock && !local_fcntl)
697 seq_printf(m, ",local_lock=none");
698 else if (local_flock && local_fcntl)
699 seq_printf(m, ",local_lock=all");
700 else if (local_flock)
701 seq_printf(m, ",local_lock=flock");
702 else
703 seq_printf(m, ",local_lock=posix");
673} 704}
674 705
675/* 706/*
@@ -1017,9 +1048,13 @@ static int nfs_parse_mount_options(char *raw,
1017 break; 1048 break;
1018 case Opt_lock: 1049 case Opt_lock:
1019 mnt->flags &= ~NFS_MOUNT_NONLM; 1050 mnt->flags &= ~NFS_MOUNT_NONLM;
1051 mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
1052 NFS_MOUNT_LOCAL_FCNTL);
1020 break; 1053 break;
1021 case Opt_nolock: 1054 case Opt_nolock:
1022 mnt->flags |= NFS_MOUNT_NONLM; 1055 mnt->flags |= NFS_MOUNT_NONLM;
1056 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
1057 NFS_MOUNT_LOCAL_FCNTL);
1023 break; 1058 break;
1024 case Opt_v2: 1059 case Opt_v2:
1025 mnt->flags &= ~NFS_MOUNT_VER3; 1060 mnt->flags &= ~NFS_MOUNT_VER3;
@@ -1420,6 +1455,34 @@ static int nfs_parse_mount_options(char *raw,
1420 mnt->fscache_uniq = string; 1455 mnt->fscache_uniq = string;
1421 mnt->options |= NFS_OPTION_FSCACHE; 1456 mnt->options |= NFS_OPTION_FSCACHE;
1422 break; 1457 break;
1458 case Opt_local_lock:
1459 string = match_strdup(args);
1460 if (string == NULL)
1461 goto out_nomem;
1462 token = match_token(string, nfs_local_lock_tokens,
1463 args);
1464 kfree(string);
1465 switch (token) {
1466 case Opt_local_lock_all:
1467 mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK |
1468 NFS_MOUNT_LOCAL_FCNTL);
1469 break;
1470 case Opt_local_lock_flock:
1471 mnt->flags |= NFS_MOUNT_LOCAL_FLOCK;
1472 break;
1473 case Opt_local_lock_posix:
1474 mnt->flags |= NFS_MOUNT_LOCAL_FCNTL;
1475 break;
1476 case Opt_local_lock_none:
1477 mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK |
1478 NFS_MOUNT_LOCAL_FCNTL);
1479 break;
1480 default:
1481 dfprintk(MOUNT, "NFS: invalid "
1482 "local_lock argument\n");
1483 return 0;
1484 };
1485 break;
1423 1486
1424 /* 1487 /*
1425 * Special options 1488 * Special options
@@ -1825,6 +1888,12 @@ static int nfs_validate_mount_data(void *options,
1825 if (!args->nfs_server.hostname) 1888 if (!args->nfs_server.hostname)
1826 goto out_nomem; 1889 goto out_nomem;
1827 1890
1891 if (!(data->flags & NFS_MOUNT_NONLM))
1892 args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK|
1893 NFS_MOUNT_LOCAL_FCNTL);
1894 else
1895 args->flags |= (NFS_MOUNT_LOCAL_FLOCK|
1896 NFS_MOUNT_LOCAL_FCNTL);
1828 /* 1897 /*
1829 * The legacy version 6 binary mount data from userspace has a 1898 * The legacy version 6 binary mount data from userspace has a
1830 * field used only to transport selinux information into the 1899 * field used only to transport selinux information into the
@@ -2441,7 +2510,8 @@ static void nfs4_fill_super(struct super_block *sb)
2441 2510
2442static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) 2511static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
2443{ 2512{
2444 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); 2513 args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3|
2514 NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL);
2445} 2515}
2446 2516
2447static int nfs4_validate_text_mount_data(void *options, 2517static int nfs4_validate_text_mount_data(void *options,
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index ad4d2e787b20..978aaeb8a093 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -32,6 +32,7 @@ static ctl_table nfs_cb_sysctls[] = {
32 .extra1 = (int *)&nfs_set_port_min, 32 .extra1 = (int *)&nfs_set_port_min,
33 .extra2 = (int *)&nfs_set_port_max, 33 .extra2 = (int *)&nfs_set_port_max,
34 }, 34 },
35#ifndef CONFIG_NFS_USE_NEW_IDMAPPER
35 { 36 {
36 .procname = "idmap_cache_timeout", 37 .procname = "idmap_cache_timeout",
37 .data = &nfs_idmap_cache_timeout, 38 .data = &nfs_idmap_cache_timeout,
@@ -39,6 +40,7 @@ static ctl_table nfs_cb_sysctls[] = {
39 .mode = 0644, 40 .mode = 0644,
40 .proc_handler = proc_dointvec_jiffies, 41 .proc_handler = proc_dointvec_jiffies,
41 }, 42 },
43#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
42#endif 44#endif
43 { 45 {
44 .procname = "nfs_mountpoint_timeout", 46 .procname = "nfs_mountpoint_timeout",
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 2f84adaad427..9a16bad5d2ea 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -13,9 +13,12 @@
13#include <linux/nfs_fs.h> 13#include <linux/nfs_fs.h>
14#include <linux/sched.h> 14#include <linux/sched.h>
15#include <linux/wait.h> 15#include <linux/wait.h>
16#include <linux/namei.h>
16 17
17#include "internal.h" 18#include "internal.h"
18#include "nfs4_fs.h" 19#include "nfs4_fs.h"
20#include "iostat.h"
21#include "delegation.h"
19 22
20struct nfs_unlinkdata { 23struct nfs_unlinkdata {
21 struct hlist_node list; 24 struct hlist_node list;
@@ -244,7 +247,7 @@ void nfs_unblock_sillyrename(struct dentry *dentry)
244 * @dir: parent directory of dentry 247 * @dir: parent directory of dentry
245 * @dentry: dentry to unlink 248 * @dentry: dentry to unlink
246 */ 249 */
247int 250static int
248nfs_async_unlink(struct inode *dir, struct dentry *dentry) 251nfs_async_unlink(struct inode *dir, struct dentry *dentry)
249{ 252{
250 struct nfs_unlinkdata *data; 253 struct nfs_unlinkdata *data;
@@ -259,7 +262,6 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
259 status = PTR_ERR(data->cred); 262 status = PTR_ERR(data->cred);
260 goto out_free; 263 goto out_free;
261 } 264 }
262 data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
263 data->res.dir_attr = &data->dir_attr; 265 data->res.dir_attr = &data->dir_attr;
264 266
265 status = -EBUSY; 267 status = -EBUSY;
@@ -303,3 +305,256 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
303 if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) 305 if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)))
304 nfs_free_unlinkdata(data); 306 nfs_free_unlinkdata(data);
305} 307}
308
309/* Cancel a queued async unlink. Called when a sillyrename run fails. */
310static void
311nfs_cancel_async_unlink(struct dentry *dentry)
312{
313 spin_lock(&dentry->d_lock);
314 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
315 struct nfs_unlinkdata *data = dentry->d_fsdata;
316
317 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
318 spin_unlock(&dentry->d_lock);
319 nfs_free_unlinkdata(data);
320 return;
321 }
322 spin_unlock(&dentry->d_lock);
323}
324
325struct nfs_renamedata {
326 struct nfs_renameargs args;
327 struct nfs_renameres res;
328 struct rpc_cred *cred;
329 struct inode *old_dir;
330 struct dentry *old_dentry;
331 struct nfs_fattr old_fattr;
332 struct inode *new_dir;
333 struct dentry *new_dentry;
334 struct nfs_fattr new_fattr;
335};
336
337/**
338 * nfs_async_rename_done - Sillyrename post-processing
339 * @task: rpc_task of the sillyrename
340 * @calldata: nfs_renamedata for the sillyrename
341 *
342 * Do the directory attribute updates and the d_move
343 */
344static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
345{
346 struct nfs_renamedata *data = calldata;
347 struct inode *old_dir = data->old_dir;
348 struct inode *new_dir = data->new_dir;
349
350 if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
351 nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
352 return;
353 }
354
355 if (task->tk_status != 0) {
356 nfs_cancel_async_unlink(data->old_dentry);
357 return;
358 }
359
360 nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir));
361 d_move(data->old_dentry, data->new_dentry);
362}
363
364/**
365 * nfs_async_rename_release - Release the sillyrename data.
366 * @calldata: the struct nfs_renamedata to be released
367 */
368static void nfs_async_rename_release(void *calldata)
369{
370 struct nfs_renamedata *data = calldata;
371 struct super_block *sb = data->old_dir->i_sb;
372
373 if (data->old_dentry->d_inode)
374 nfs_mark_for_revalidate(data->old_dentry->d_inode);
375
376 dput(data->old_dentry);
377 dput(data->new_dentry);
378 iput(data->old_dir);
379 iput(data->new_dir);
380 nfs_sb_deactive(sb);
381 put_rpccred(data->cred);
382 kfree(data);
383}
384
385#if defined(CONFIG_NFS_V4_1)
386static void nfs_rename_prepare(struct rpc_task *task, void *calldata)
387{
388 struct nfs_renamedata *data = calldata;
389 struct nfs_server *server = NFS_SERVER(data->old_dir);
390
391 if (nfs4_setup_sequence(server, &data->args.seq_args,
392 &data->res.seq_res, 1, task))
393 return;
394 rpc_call_start(task);
395}
396#endif /* CONFIG_NFS_V4_1 */
397
398static const struct rpc_call_ops nfs_rename_ops = {
399 .rpc_call_done = nfs_async_rename_done,
400 .rpc_release = nfs_async_rename_release,
401#if defined(CONFIG_NFS_V4_1)
402 .rpc_call_prepare = nfs_rename_prepare,
403#endif /* CONFIG_NFS_V4_1 */
404};
405
406/**
407 * nfs_async_rename - perform an asynchronous rename operation
408 * @old_dir: directory that currently holds the dentry to be renamed
409 * @new_dir: target directory for the rename
410 * @old_dentry: original dentry to be renamed
411 * @new_dentry: dentry to which the old_dentry should be renamed
412 *
413 * It's expected that valid references to the dentries and inodes are held
414 */
415static struct rpc_task *
416nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
417 struct dentry *old_dentry, struct dentry *new_dentry)
418{
419 struct nfs_renamedata *data;
420 struct rpc_message msg = { };
421 struct rpc_task_setup task_setup_data = {
422 .rpc_message = &msg,
423 .callback_ops = &nfs_rename_ops,
424 .workqueue = nfsiod_workqueue,
425 .rpc_client = NFS_CLIENT(old_dir),
426 .flags = RPC_TASK_ASYNC,
427 };
428
429 data = kzalloc(sizeof(*data), GFP_KERNEL);
430 if (data == NULL)
431 return ERR_PTR(-ENOMEM);
432 task_setup_data.callback_data = data,
433
434 data->cred = rpc_lookup_cred();
435 if (IS_ERR(data->cred)) {
436 struct rpc_task *task = ERR_CAST(data->cred);
437 kfree(data);
438 return task;
439 }
440
441 msg.rpc_argp = &data->args;
442 msg.rpc_resp = &data->res;
443 msg.rpc_cred = data->cred;
444
445 /* set up nfs_renamedata */
446 data->old_dir = old_dir;
447 atomic_inc(&old_dir->i_count);
448 data->new_dir = new_dir;
449 atomic_inc(&new_dir->i_count);
450 data->old_dentry = dget(old_dentry);
451 data->new_dentry = dget(new_dentry);
452 nfs_fattr_init(&data->old_fattr);
453 nfs_fattr_init(&data->new_fattr);
454
455 /* set up nfs_renameargs */
456 data->args.old_dir = NFS_FH(old_dir);
457 data->args.old_name = &old_dentry->d_name;
458 data->args.new_dir = NFS_FH(new_dir);
459 data->args.new_name = &new_dentry->d_name;
460
461 /* set up nfs_renameres */
462 data->res.old_fattr = &data->old_fattr;
463 data->res.new_fattr = &data->new_fattr;
464
465 nfs_sb_active(old_dir->i_sb);
466
467 NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir);
468
469 return rpc_run_task(&task_setup_data);
470}
471
472/**
473 * nfs_sillyrename - Perform a silly-rename of a dentry
474 * @dir: inode of directory that contains dentry
475 * @dentry: dentry to be sillyrenamed
476 *
477 * NFSv2/3 is stateless and the server doesn't know when the client is
478 * holding a file open. To prevent application problems when a file is
479 * unlinked while it's still open, the client performs a "silly-rename".
480 * That is, it renames the file to a hidden file in the same directory,
481 * and only performs the unlink once the last reference to it is put.
482 *
483 * The final cleanup is done during dentry_iput.
484 */
485int
486nfs_sillyrename(struct inode *dir, struct dentry *dentry)
487{
488 static unsigned int sillycounter;
489 const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2;
490 const int countersize = sizeof(sillycounter)*2;
491 const int slen = sizeof(".nfs")+fileidsize+countersize-1;
492 char silly[slen+1];
493 struct dentry *sdentry;
494 struct rpc_task *task;
495 int error = -EIO;
496
497 dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n",
498 dentry->d_parent->d_name.name, dentry->d_name.name,
499 atomic_read(&dentry->d_count));
500 nfs_inc_stats(dir, NFSIOS_SILLYRENAME);
501
502 /*
503 * We don't allow a dentry to be silly-renamed twice.
504 */
505 error = -EBUSY;
506 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
507 goto out;
508
509 sprintf(silly, ".nfs%*.*Lx",
510 fileidsize, fileidsize,
511 (unsigned long long)NFS_FILEID(dentry->d_inode));
512
513 /* Return delegation in anticipation of the rename */
514 nfs_inode_return_delegation(dentry->d_inode);
515
516 sdentry = NULL;
517 do {
518 char *suffix = silly + slen - countersize;
519
520 dput(sdentry);
521 sillycounter++;
522 sprintf(suffix, "%*.*x", countersize, countersize, sillycounter);
523
524 dfprintk(VFS, "NFS: trying to rename %s to %s\n",
525 dentry->d_name.name, silly);
526
527 sdentry = lookup_one_len(silly, dentry->d_parent, slen);
528 /*
529 * N.B. Better to return EBUSY here ... it could be
530 * dangerous to delete the file while it's in use.
531 */
532 if (IS_ERR(sdentry))
533 goto out;
534 } while (sdentry->d_inode != NULL); /* need negative lookup */
535
536 /* queue unlink first. Can't do this from rpc_release as it
537 * has to allocate memory
538 */
539 error = nfs_async_unlink(dir, dentry);
540 if (error)
541 goto out_dput;
542
543 /* run the rename task, undo unlink if it fails */
544 task = nfs_async_rename(dir, dir, dentry, sdentry);
545 if (IS_ERR(task)) {
546 error = -EBUSY;
547 nfs_cancel_async_unlink(dentry);
548 goto out_dput;
549 }
550
551 /* wait for the RPC task to complete, unless a SIGKILL intervenes */
552 error = rpc_wait_for_completion_task(task);
553 if (error == 0)
554 error = task->tk_status;
555 rpc_put_task(task);
556out_dput:
557 dput(sdentry);
558out:
559 return error;
560}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 874972d9427c..4c14c17a5276 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -55,7 +55,6 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
55 if (p) { 55 if (p) {
56 memset(p, 0, sizeof(*p)); 56 memset(p, 0, sizeof(*p));
57 INIT_LIST_HEAD(&p->pages); 57 INIT_LIST_HEAD(&p->pages);
58 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
59 } 58 }
60 return p; 59 return p;
61} 60}
@@ -75,7 +74,6 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
75 memset(p, 0, sizeof(*p)); 74 memset(p, 0, sizeof(*p));
76 INIT_LIST_HEAD(&p->pages); 75 INIT_LIST_HEAD(&p->pages);
77 p->npages = pagecount; 76 p->npages = pagecount;
78 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
79 if (pagecount <= ARRAY_SIZE(p->page_array)) 77 if (pagecount <= ARRAY_SIZE(p->page_array))
80 p->pagevec = p->page_array; 78 p->pagevec = p->page_array;
81 else { 79 else {
@@ -292,9 +290,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
292 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 290 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
293 291
294 nfs_pageio_cond_complete(pgio, page->index); 292 nfs_pageio_cond_complete(pgio, page->index);
295 ret = nfs_page_async_flush(pgio, page, 293 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
296 wbc->sync_mode == WB_SYNC_NONE ||
297 wbc->nonblocking != 0);
298 if (ret == -EAGAIN) { 294 if (ret == -EAGAIN) {
299 redirty_page_for_writepage(wbc, page); 295 redirty_page_for_writepage(wbc, page);
300 ret = 0; 296 ret = 0;
@@ -1433,15 +1429,17 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1433 int flags = FLUSH_SYNC; 1429 int flags = FLUSH_SYNC;
1434 int ret = 0; 1430 int ret = 0;
1435 1431
1436 /* Don't commit yet if this is a non-blocking flush and there are 1432 if (wbc->sync_mode == WB_SYNC_NONE) {
1437 * lots of outstanding writes for this mapping. 1433 /* Don't commit yet if this is a non-blocking flush and there
1438 */ 1434 * are a lot of outstanding writes for this mapping.
1439 if (wbc->sync_mode == WB_SYNC_NONE && 1435 */
1440 nfsi->ncommit <= (nfsi->npages >> 1)) 1436 if (nfsi->ncommit <= (nfsi->npages >> 1))
1441 goto out_mark_dirty; 1437 goto out_mark_dirty;
1442 1438
1443 if (wbc->nonblocking || wbc->for_background) 1439 /* don't wait for the COMMIT response */
1444 flags = 0; 1440 flags = 0;
1441 }
1442
1445 ret = nfs_commit_inode(inode, flags); 1443 ret = nfs_commit_inode(inode, flags);
1446 if (ret >= 0) { 1444 if (ret >= 0) {
1447 if (wbc->sync_mode == WB_SYNC_NONE) { 1445 if (wbc->sync_mode == WB_SYNC_NONE) {
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 7cf4ddafb4ab..31a78fce4732 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -29,6 +29,18 @@ config NFSD
29 29
30 If unsure, say N. 30 If unsure, say N.
31 31
32config NFSD_DEPRECATED
33 bool "Include support for deprecated syscall interface to NFSD"
34 depends on NFSD
35 default y
36 help
37 The syscall interface to nfsd was obsoleted in 2.6.0 by a new
38 filesystem based interface. The old interface is due for removal
39 in 2.6.40. If you wish to remove the interface before then
40 say N.
41
42 In unsure, say Y.
43
32config NFSD_V2_ACL 44config NFSD_V2_ACL
33 bool 45 bool
34 depends on NFSD 46 depends on NFSD
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c2a4f71d87dd..c0fcb7ab7f6d 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -28,9 +28,6 @@
28typedef struct auth_domain svc_client; 28typedef struct auth_domain svc_client;
29typedef struct svc_export svc_export; 29typedef struct svc_export svc_export;
30 30
31static void exp_do_unexport(svc_export *unexp);
32static int exp_verify_string(char *cp, int max);
33
34/* 31/*
35 * We have two caches. 32 * We have two caches.
36 * One maps client+vfsmnt+dentry to export options - the export map 33 * One maps client+vfsmnt+dentry to export options - the export map
@@ -802,6 +799,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
802 return ek; 799 return ek;
803} 800}
804 801
802#ifdef CONFIG_NFSD_DEPRECATED
805static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, 803static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
806 struct svc_export *exp) 804 struct svc_export *exp)
807{ 805{
@@ -852,6 +850,7 @@ exp_get_fsid_key(svc_client *clp, int fsid)
852 850
853 return exp_find_key(clp, FSID_NUM, fsidv, NULL); 851 return exp_find_key(clp, FSID_NUM, fsidv, NULL);
854} 852}
853#endif
855 854
856static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, 855static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
857 struct cache_req *reqp) 856 struct cache_req *reqp)
@@ -893,6 +892,7 @@ static struct svc_export *exp_parent(svc_client *clp, struct path *path)
893 return exp; 892 return exp;
894} 893}
895 894
895#ifdef CONFIG_NFSD_DEPRECATED
896/* 896/*
897 * Hashtable locking. Write locks are placed only by user processes 897 * Hashtable locking. Write locks are placed only by user processes
898 * wanting to modify export information. 898 * wanting to modify export information.
@@ -925,6 +925,19 @@ exp_writeunlock(void)
925{ 925{
926 up_write(&hash_sem); 926 up_write(&hash_sem);
927} 927}
928#else
929
930/* hash_sem not needed once deprecated interface is removed */
931void exp_readlock(void) {}
932static inline void exp_writelock(void){}
933void exp_readunlock(void) {}
934static inline void exp_writeunlock(void){}
935
936#endif
937
938#ifdef CONFIG_NFSD_DEPRECATED
939static void exp_do_unexport(svc_export *unexp);
940static int exp_verify_string(char *cp, int max);
928 941
929static void exp_fsid_unhash(struct svc_export *exp) 942static void exp_fsid_unhash(struct svc_export *exp)
930{ 943{
@@ -935,10 +948,9 @@ static void exp_fsid_unhash(struct svc_export *exp)
935 948
936 ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); 949 ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
937 if (!IS_ERR(ek)) { 950 if (!IS_ERR(ek)) {
938 ek->h.expiry_time = get_seconds()-1; 951 sunrpc_invalidate(&ek->h, &svc_expkey_cache);
939 cache_put(&ek->h, &svc_expkey_cache); 952 cache_put(&ek->h, &svc_expkey_cache);
940 } 953 }
941 svc_expkey_cache.nextcheck = get_seconds();
942} 954}
943 955
944static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) 956static int exp_fsid_hash(svc_client *clp, struct svc_export *exp)
@@ -973,10 +985,9 @@ static void exp_unhash(struct svc_export *exp)
973 985
974 ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); 986 ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
975 if (!IS_ERR(ek)) { 987 if (!IS_ERR(ek)) {
976 ek->h.expiry_time = get_seconds()-1; 988 sunrpc_invalidate(&ek->h, &svc_expkey_cache);
977 cache_put(&ek->h, &svc_expkey_cache); 989 cache_put(&ek->h, &svc_expkey_cache);
978 } 990 }
979 svc_expkey_cache.nextcheck = get_seconds();
980} 991}
981 992
982/* 993/*
@@ -1097,8 +1108,7 @@ out:
1097static void 1108static void
1098exp_do_unexport(svc_export *unexp) 1109exp_do_unexport(svc_export *unexp)
1099{ 1110{
1100 unexp->h.expiry_time = get_seconds()-1; 1111 sunrpc_invalidate(&unexp->h, &svc_export_cache);
1101 svc_export_cache.nextcheck = get_seconds();
1102 exp_unhash(unexp); 1112 exp_unhash(unexp);
1103 exp_fsid_unhash(unexp); 1113 exp_fsid_unhash(unexp);
1104} 1114}
@@ -1150,6 +1160,7 @@ out_unlock:
1150 exp_writeunlock(); 1160 exp_writeunlock();
1151 return err; 1161 return err;
1152} 1162}
1163#endif /* CONFIG_NFSD_DEPRECATED */
1153 1164
1154/* 1165/*
1155 * Obtain the root fh on behalf of a client. 1166 * Obtain the root fh on behalf of a client.
@@ -1459,25 +1470,43 @@ static void show_secinfo_flags(struct seq_file *m, int flags)
1459 show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); 1470 show_expflags(m, flags, NFSEXP_SECINFO_FLAGS);
1460} 1471}
1461 1472
1473static bool secinfo_flags_equal(int f, int g)
1474{
1475 f &= NFSEXP_SECINFO_FLAGS;
1476 g &= NFSEXP_SECINFO_FLAGS;
1477 return f == g;
1478}
1479
1480static int show_secinfo_run(struct seq_file *m, struct exp_flavor_info **fp, struct exp_flavor_info *end)
1481{
1482 int flags;
1483
1484 flags = (*fp)->flags;
1485 seq_printf(m, ",sec=%d", (*fp)->pseudoflavor);
1486 (*fp)++;
1487 while (*fp != end && secinfo_flags_equal(flags, (*fp)->flags)) {
1488 seq_printf(m, ":%d", (*fp)->pseudoflavor);
1489 (*fp)++;
1490 }
1491 return flags;
1492}
1493
1462static void show_secinfo(struct seq_file *m, struct svc_export *exp) 1494static void show_secinfo(struct seq_file *m, struct svc_export *exp)
1463{ 1495{
1464 struct exp_flavor_info *f; 1496 struct exp_flavor_info *f;
1465 struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; 1497 struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
1466 int lastflags = 0, first = 0; 1498 int flags;
1467 1499
1468 if (exp->ex_nflavors == 0) 1500 if (exp->ex_nflavors == 0)
1469 return; 1501 return;
1470 for (f = exp->ex_flavors; f < end; f++) { 1502 f = exp->ex_flavors;
1471 if (first || f->flags != lastflags) { 1503 flags = show_secinfo_run(m, &f, end);
1472 if (!first) 1504 if (!secinfo_flags_equal(flags, exp->ex_flags))
1473 show_secinfo_flags(m, lastflags); 1505 show_secinfo_flags(m, flags);
1474 seq_printf(m, ",sec=%d", f->pseudoflavor); 1506 while (f != end) {
1475 lastflags = f->flags; 1507 flags = show_secinfo_run(m, &f, end);
1476 } else { 1508 show_secinfo_flags(m, flags);
1477 seq_printf(m, ":%d", f->pseudoflavor);
1478 }
1479 } 1509 }
1480 show_secinfo_flags(m, lastflags);
1481} 1510}
1482 1511
1483static void exp_flags(struct seq_file *m, int flag, int fsid, 1512static void exp_flags(struct seq_file *m, int flag, int fsid,
@@ -1532,6 +1561,7 @@ const struct seq_operations nfs_exports_op = {
1532 .show = e_show, 1561 .show = e_show,
1533}; 1562};
1534 1563
1564#ifdef CONFIG_NFSD_DEPRECATED
1535/* 1565/*
1536 * Add or modify a client. 1566 * Add or modify a client.
1537 * Change requests may involve the list of host addresses. The list of 1567 * Change requests may involve the list of host addresses. The list of
@@ -1563,7 +1593,7 @@ exp_addclient(struct nfsctl_client *ncp)
1563 /* Insert client into hashtable. */ 1593 /* Insert client into hashtable. */
1564 for (i = 0; i < ncp->cl_naddr; i++) { 1594 for (i = 0; i < ncp->cl_naddr; i++) {
1565 ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); 1595 ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6);
1566 auth_unix_add_addr(&addr6, dom); 1596 auth_unix_add_addr(&init_net, &addr6, dom);
1567 } 1597 }
1568 auth_unix_forget_old(dom); 1598 auth_unix_forget_old(dom);
1569 auth_domain_put(dom); 1599 auth_domain_put(dom);
@@ -1621,6 +1651,7 @@ exp_verify_string(char *cp, int max)
1621 printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); 1651 printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp);
1622 return 0; 1652 return 0;
1623} 1653}
1654#endif /* CONFIG_NFSD_DEPRECATED */
1624 1655
1625/* 1656/*
1626 * Initialize the exports module. 1657 * Initialize the exports module.
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 988cbb3a19b6..143da2eecd7b 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -41,7 +41,6 @@
41 41
42#define NFSPROC4_CB_NULL 0 42#define NFSPROC4_CB_NULL 0
43#define NFSPROC4_CB_COMPOUND 1 43#define NFSPROC4_CB_COMPOUND 1
44#define NFS4_STATEID_SIZE 16
45 44
46/* Index of predefined Linux callback client operations */ 45/* Index of predefined Linux callback client operations */
47 46
@@ -248,10 +247,11 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp,
248} 247}
249 248
250static void 249static void
251encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, 250encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
252 struct nfs4_cb_compound_hdr *hdr) 251 struct nfs4_cb_compound_hdr *hdr)
253{ 252{
254 __be32 *p; 253 __be32 *p;
254 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
255 255
256 if (hdr->minorversion == 0) 256 if (hdr->minorversion == 0)
257 return; 257 return;
@@ -259,8 +259,8 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args,
259 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); 259 RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20);
260 260
261 WRITE32(OP_CB_SEQUENCE); 261 WRITE32(OP_CB_SEQUENCE);
262 WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); 262 WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN);
263 WRITE32(args->cbs_clp->cl_cb_seq_nr); 263 WRITE32(ses->se_cb_seq_nr);
264 WRITE32(0); /* slotid, always 0 */ 264 WRITE32(0); /* slotid, always 0 */
265 WRITE32(0); /* highest slotid always 0 */ 265 WRITE32(0); /* highest slotid always 0 */
266 WRITE32(0); /* cachethis always 0 */ 266 WRITE32(0); /* cachethis always 0 */
@@ -280,18 +280,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p)
280 280
281static int 281static int
282nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, 282nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p,
283 struct nfs4_rpc_args *rpc_args) 283 struct nfsd4_callback *cb)
284{ 284{
285 struct xdr_stream xdr; 285 struct xdr_stream xdr;
286 struct nfs4_delegation *args = rpc_args->args_op; 286 struct nfs4_delegation *args = cb->cb_op;
287 struct nfs4_cb_compound_hdr hdr = { 287 struct nfs4_cb_compound_hdr hdr = {
288 .ident = args->dl_ident, 288 .ident = cb->cb_clp->cl_cb_ident,
289 .minorversion = rpc_args->args_seq.cbs_minorversion, 289 .minorversion = cb->cb_minorversion,
290 }; 290 };
291 291
292 xdr_init_encode(&xdr, &req->rq_snd_buf, p); 292 xdr_init_encode(&xdr, &req->rq_snd_buf, p);
293 encode_cb_compound_hdr(&xdr, &hdr); 293 encode_cb_compound_hdr(&xdr, &hdr);
294 encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr); 294 encode_cb_sequence(&xdr, cb, &hdr);
295 encode_cb_recall(&xdr, args, &hdr); 295 encode_cb_recall(&xdr, args, &hdr);
296 encode_cb_nops(&hdr); 296 encode_cb_nops(&hdr);
297 return 0; 297 return 0;
@@ -339,15 +339,16 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
339 * with a single slot. 339 * with a single slot.
340 */ 340 */
341static int 341static int
342decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, 342decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb,
343 struct rpc_rqst *rqstp) 343 struct rpc_rqst *rqstp)
344{ 344{
345 struct nfsd4_session *ses = cb->cb_clp->cl_cb_session;
345 struct nfs4_sessionid id; 346 struct nfs4_sessionid id;
346 int status; 347 int status;
347 u32 dummy; 348 u32 dummy;
348 __be32 *p; 349 __be32 *p;
349 350
350 if (res->cbs_minorversion == 0) 351 if (cb->cb_minorversion == 0)
351 return 0; 352 return 0;
352 353
353 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); 354 status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE);
@@ -363,13 +364,12 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res,
363 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); 364 READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
364 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); 365 memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN);
365 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); 366 p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN);
366 if (memcmp(id.data, res->cbs_clp->cl_sessionid.data, 367 if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) {
367 NFS4_MAX_SESSIONID_LEN)) {
368 dprintk("%s Invalid session id\n", __func__); 368 dprintk("%s Invalid session id\n", __func__);
369 goto out; 369 goto out;
370 } 370 }
371 READ32(dummy); 371 READ32(dummy);
372 if (dummy != res->cbs_clp->cl_cb_seq_nr) { 372 if (dummy != ses->se_cb_seq_nr) {
373 dprintk("%s Invalid sequence number\n", __func__); 373 dprintk("%s Invalid sequence number\n", __func__);
374 goto out; 374 goto out;
375 } 375 }
@@ -393,7 +393,7 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p)
393 393
394static int 394static int
395nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, 395nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
396 struct nfsd4_cb_sequence *seq) 396 struct nfsd4_callback *cb)
397{ 397{
398 struct xdr_stream xdr; 398 struct xdr_stream xdr;
399 struct nfs4_cb_compound_hdr hdr; 399 struct nfs4_cb_compound_hdr hdr;
@@ -403,8 +403,8 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p,
403 status = decode_cb_compound_hdr(&xdr, &hdr); 403 status = decode_cb_compound_hdr(&xdr, &hdr);
404 if (status) 404 if (status)
405 goto out; 405 goto out;
406 if (seq) { 406 if (cb) {
407 status = decode_cb_sequence(&xdr, seq, rqstp); 407 status = decode_cb_sequence(&xdr, cb, rqstp);
408 if (status) 408 if (status)
409 goto out; 409 goto out;
410 } 410 }
@@ -473,30 +473,34 @@ static int max_cb_time(void)
473/* Reference counting, callback cleanup, etc., all look racy as heck. 473/* Reference counting, callback cleanup, etc., all look racy as heck.
474 * And why is cl_cb_set an atomic? */ 474 * And why is cl_cb_set an atomic? */
475 475
476int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) 476int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
477{ 477{
478 struct rpc_timeout timeparms = { 478 struct rpc_timeout timeparms = {
479 .to_initval = max_cb_time(), 479 .to_initval = max_cb_time(),
480 .to_retries = 0, 480 .to_retries = 0,
481 }; 481 };
482 struct rpc_create_args args = { 482 struct rpc_create_args args = {
483 .protocol = XPRT_TRANSPORT_TCP, 483 .net = &init_net,
484 .address = (struct sockaddr *) &cb->cb_addr, 484 .address = (struct sockaddr *) &conn->cb_addr,
485 .addrsize = cb->cb_addrlen, 485 .addrsize = conn->cb_addrlen,
486 .timeout = &timeparms, 486 .timeout = &timeparms,
487 .program = &cb_program, 487 .program = &cb_program,
488 .prognumber = cb->cb_prog,
489 .version = 0, 488 .version = 0,
490 .authflavor = clp->cl_flavor, 489 .authflavor = clp->cl_flavor,
491 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), 490 .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET),
492 .client_name = clp->cl_principal,
493 }; 491 };
494 struct rpc_clnt *client; 492 struct rpc_clnt *client;
495 493
496 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) 494 if (clp->cl_minorversion == 0) {
497 return -EINVAL; 495 if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
498 if (cb->cb_minorversion) { 496 return -EINVAL;
499 args.bc_xprt = cb->cb_xprt; 497 args.client_name = clp->cl_principal;
498 args.prognumber = conn->cb_prog,
499 args.protocol = XPRT_TRANSPORT_TCP;
500 clp->cl_cb_ident = conn->cb_ident;
501 } else {
502 args.bc_xprt = conn->cb_xprt;
503 args.prognumber = clp->cl_cb_session->se_cb_prog;
500 args.protocol = XPRT_TRANSPORT_BC_TCP; 504 args.protocol = XPRT_TRANSPORT_BC_TCP;
501 } 505 }
502 /* Create RPC client */ 506 /* Create RPC client */
@@ -506,7 +510,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
506 PTR_ERR(client)); 510 PTR_ERR(client));
507 return PTR_ERR(client); 511 return PTR_ERR(client);
508 } 512 }
509 nfsd4_set_callback_client(clp, client); 513 clp->cl_cb_client = client;
510 return 0; 514 return 0;
511 515
512} 516}
@@ -519,7 +523,7 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason)
519 523
520static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) 524static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
521{ 525{
522 struct nfs4_client *clp = calldata; 526 struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
523 527
524 if (task->tk_status) 528 if (task->tk_status)
525 warn_no_callback_path(clp, task->tk_status); 529 warn_no_callback_path(clp, task->tk_status);
@@ -528,6 +532,8 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
528} 532}
529 533
530static const struct rpc_call_ops nfsd4_cb_probe_ops = { 534static const struct rpc_call_ops nfsd4_cb_probe_ops = {
535 /* XXX: release method to ensure we set the cb channel down if
536 * necessary on early failure? */
531 .rpc_call_done = nfsd4_cb_probe_done, 537 .rpc_call_done = nfsd4_cb_probe_done,
532}; 538};
533 539
@@ -543,38 +549,42 @@ int set_callback_cred(void)
543 return 0; 549 return 0;
544} 550}
545 551
552static struct workqueue_struct *callback_wq;
546 553
547void do_probe_callback(struct nfs4_client *clp) 554static void do_probe_callback(struct nfs4_client *clp)
548{ 555{
549 struct rpc_message msg = { 556 struct nfsd4_callback *cb = &clp->cl_cb_null;
550 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
551 .rpc_argp = clp,
552 .rpc_cred = callback_cred
553 };
554 int status;
555 557
556 status = rpc_call_async(clp->cl_cb_client, &msg, 558 cb->cb_op = NULL;
557 RPC_TASK_SOFT | RPC_TASK_SOFTCONN, 559 cb->cb_clp = clp;
558 &nfsd4_cb_probe_ops, (void *)clp); 560
559 if (status) 561 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
560 warn_no_callback_path(clp, status); 562 cb->cb_msg.rpc_argp = NULL;
563 cb->cb_msg.rpc_resp = NULL;
564 cb->cb_msg.rpc_cred = callback_cred;
565
566 cb->cb_ops = &nfsd4_cb_probe_ops;
567
568 queue_work(callback_wq, &cb->cb_work);
561} 569}
562 570
563/* 571/*
564 * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... 572 * Poke the callback thread to process any updates to the callback
573 * parameters, and send a null probe.
565 */ 574 */
566void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb) 575void nfsd4_probe_callback(struct nfs4_client *clp)
567{ 576{
568 int status; 577 set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
578 do_probe_callback(clp);
579}
569 580
581void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn)
582{
570 BUG_ON(atomic_read(&clp->cl_cb_set)); 583 BUG_ON(atomic_read(&clp->cl_cb_set));
571 584
572 status = setup_callback_client(clp, cb); 585 spin_lock(&clp->cl_lock);
573 if (status) { 586 memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn));
574 warn_no_callback_path(clp, status); 587 spin_unlock(&clp->cl_lock);
575 return;
576 }
577 do_probe_callback(clp);
578} 588}
579 589
580/* 590/*
@@ -585,8 +595,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb)
585static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, 595static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
586 struct rpc_task *task) 596 struct rpc_task *task)
587{ 597{
588 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; 598 u32 *ptr = (u32 *)clp->cl_cb_session->se_sessionid.data;
589 u32 *ptr = (u32 *)clp->cl_sessionid.data;
590 int status = 0; 599 int status = 0;
591 600
592 dprintk("%s: %u:%u:%u:%u\n", __func__, 601 dprintk("%s: %u:%u:%u:%u\n", __func__,
@@ -598,14 +607,6 @@ static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
598 status = -EAGAIN; 607 status = -EAGAIN;
599 goto out; 608 goto out;
600 } 609 }
601
602 /*
603 * We'll need the clp during XDR encoding and decoding,
604 * and the sequence during decoding to verify the reply
605 */
606 args->args_seq.cbs_clp = clp;
607 task->tk_msg.rpc_resp = &args->args_seq;
608
609out: 610out:
610 dprintk("%s status=%d\n", __func__, status); 611 dprintk("%s status=%d\n", __func__, status);
611 return status; 612 return status;
@@ -617,13 +618,13 @@ out:
617 */ 618 */
618static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) 619static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
619{ 620{
620 struct nfs4_delegation *dp = calldata; 621 struct nfsd4_callback *cb = calldata;
622 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
621 struct nfs4_client *clp = dp->dl_client; 623 struct nfs4_client *clp = dp->dl_client;
622 struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; 624 u32 minorversion = clp->cl_minorversion;
623 u32 minorversion = clp->cl_cb_conn.cb_minorversion;
624 int status = 0; 625 int status = 0;
625 626
626 args->args_seq.cbs_minorversion = minorversion; 627 cb->cb_minorversion = minorversion;
627 if (minorversion) { 628 if (minorversion) {
628 status = nfsd41_cb_setup_sequence(clp, task); 629 status = nfsd41_cb_setup_sequence(clp, task);
629 if (status) { 630 if (status) {
@@ -640,19 +641,20 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
640 641
641static void nfsd4_cb_done(struct rpc_task *task, void *calldata) 642static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
642{ 643{
643 struct nfs4_delegation *dp = calldata; 644 struct nfsd4_callback *cb = calldata;
645 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
644 struct nfs4_client *clp = dp->dl_client; 646 struct nfs4_client *clp = dp->dl_client;
645 647
646 dprintk("%s: minorversion=%d\n", __func__, 648 dprintk("%s: minorversion=%d\n", __func__,
647 clp->cl_cb_conn.cb_minorversion); 649 clp->cl_minorversion);
648 650
649 if (clp->cl_cb_conn.cb_minorversion) { 651 if (clp->cl_minorversion) {
650 /* No need for lock, access serialized in nfsd4_cb_prepare */ 652 /* No need for lock, access serialized in nfsd4_cb_prepare */
651 ++clp->cl_cb_seq_nr; 653 ++clp->cl_cb_session->se_cb_seq_nr;
652 clear_bit(0, &clp->cl_cb_slot_busy); 654 clear_bit(0, &clp->cl_cb_slot_busy);
653 rpc_wake_up_next(&clp->cl_cb_waitq); 655 rpc_wake_up_next(&clp->cl_cb_waitq);
654 dprintk("%s: freed slot, new seqid=%d\n", __func__, 656 dprintk("%s: freed slot, new seqid=%d\n", __func__,
655 clp->cl_cb_seq_nr); 657 clp->cl_cb_session->se_cb_seq_nr);
656 658
657 /* We're done looking into the sequence information */ 659 /* We're done looking into the sequence information */
658 task->tk_msg.rpc_resp = NULL; 660 task->tk_msg.rpc_resp = NULL;
@@ -662,7 +664,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
662 664
663static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) 665static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
664{ 666{
665 struct nfs4_delegation *dp = calldata; 667 struct nfsd4_callback *cb = calldata;
668 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
666 struct nfs4_client *clp = dp->dl_client; 669 struct nfs4_client *clp = dp->dl_client;
667 struct rpc_clnt *current_rpc_client = clp->cl_cb_client; 670 struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
668 671
@@ -707,7 +710,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
707 710
708static void nfsd4_cb_recall_release(void *calldata) 711static void nfsd4_cb_recall_release(void *calldata)
709{ 712{
710 struct nfs4_delegation *dp = calldata; 713 struct nfsd4_callback *cb = calldata;
714 struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
711 715
712 nfs4_put_delegation(dp); 716 nfs4_put_delegation(dp);
713} 717}
@@ -718,8 +722,6 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = {
718 .rpc_release = nfsd4_cb_recall_release, 722 .rpc_release = nfsd4_cb_recall_release,
719}; 723};
720 724
721static struct workqueue_struct *callback_wq;
722
723int nfsd4_create_callback_queue(void) 725int nfsd4_create_callback_queue(void)
724{ 726{
725 callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); 727 callback_wq = create_singlethread_workqueue("nfsd4_callbacks");
@@ -734,57 +736,88 @@ void nfsd4_destroy_callback_queue(void)
734} 736}
735 737
736/* must be called under the state lock */ 738/* must be called under the state lock */
737void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new) 739void nfsd4_shutdown_callback(struct nfs4_client *clp)
738{ 740{
739 struct rpc_clnt *old = clp->cl_cb_client; 741 set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags);
740
741 clp->cl_cb_client = new;
742 /* 742 /*
743 * After this, any work that saw the old value of cl_cb_client will 743 * Note this won't actually result in a null callback;
744 * be gone: 744 * instead, nfsd4_do_callback_rpc() will detect the killed
745 * client, destroy the rpc client, and stop:
745 */ 746 */
747 do_probe_callback(clp);
746 flush_workqueue(callback_wq); 748 flush_workqueue(callback_wq);
747 /* So we can safely shut it down: */
748 if (old)
749 rpc_shutdown_client(old);
750} 749}
751 750
752/* 751void nfsd4_release_cb(struct nfsd4_callback *cb)
753 * called with dp->dl_count inc'ed.
754 */
755static void _nfsd4_cb_recall(struct nfs4_delegation *dp)
756{ 752{
757 struct nfs4_client *clp = dp->dl_client; 753 if (cb->cb_ops->rpc_release)
758 struct rpc_clnt *clnt = clp->cl_cb_client; 754 cb->cb_ops->rpc_release(cb);
759 struct nfs4_rpc_args *args = &dp->dl_recall.cb_args; 755}
760 struct rpc_message msg = {
761 .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
762 .rpc_cred = callback_cred
763 };
764 756
765 if (clnt == NULL) { 757void nfsd4_process_cb_update(struct nfsd4_callback *cb)
766 nfs4_put_delegation(dp); 758{
767 return; /* Client is shutting down; give up. */ 759 struct nfs4_cb_conn conn;
760 struct nfs4_client *clp = cb->cb_clp;
761 int err;
762
763 /*
764 * This is either an update, or the client dying; in either case,
765 * kill the old client:
766 */
767 if (clp->cl_cb_client) {
768 rpc_shutdown_client(clp->cl_cb_client);
769 clp->cl_cb_client = NULL;
768 } 770 }
771 if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags))
772 return;
773 spin_lock(&clp->cl_lock);
774 /*
775 * Only serialized callback code is allowed to clear these
776 * flags; main nfsd code can only set them:
777 */
778 BUG_ON(!clp->cl_cb_flags);
779 clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags);
780 memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
781 spin_unlock(&clp->cl_lock);
769 782
770 args->args_op = dp; 783 err = setup_callback_client(clp, &conn);
771 msg.rpc_argp = args; 784 if (err)
772 dp->dl_retries = 1; 785 warn_no_callback_path(clp, err);
773 rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp);
774} 786}
775 787
776void nfsd4_do_callback_rpc(struct work_struct *w) 788void nfsd4_do_callback_rpc(struct work_struct *w)
777{ 789{
778 /* XXX: for now, just send off delegation recall. */ 790 struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work);
779 /* In future, generalize to handle any sort of callback. */ 791 struct nfs4_client *clp = cb->cb_clp;
780 struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work); 792 struct rpc_clnt *clnt;
781 struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall);
782 793
783 _nfsd4_cb_recall(dp); 794 if (clp->cl_cb_flags)
784} 795 nfsd4_process_cb_update(cb);
785 796
797 clnt = clp->cl_cb_client;
798 if (!clnt) {
799 /* Callback channel broken, or client killed; give up: */
800 nfsd4_release_cb(cb);
801 return;
802 }
803 rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
804 cb->cb_ops, cb);
805}
786 806
787void nfsd4_cb_recall(struct nfs4_delegation *dp) 807void nfsd4_cb_recall(struct nfs4_delegation *dp)
788{ 808{
809 struct nfsd4_callback *cb = &dp->dl_recall;
810
811 dp->dl_retries = 1;
812 cb->cb_op = dp;
813 cb->cb_clp = dp->dl_client;
814 cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL];
815 cb->cb_msg.rpc_argp = cb;
816 cb->cb_msg.rpc_resp = cb;
817 cb->cb_msg.rpc_cred = callback_cred;
818
819 cb->cb_ops = &nfsd4_cb_recall_ops;
820 dp->dl_retries = 1;
821
789 queue_work(callback_wq, &dp->dl_recall.cb_work); 822 queue_work(callback_wq, &dp->dl_recall.cb_work);
790} 823}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index c78dbf493424..f0695e815f0e 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -482,109 +482,26 @@ nfsd_idmap_shutdown(void)
482 cache_unregister(&nametoid_cache); 482 cache_unregister(&nametoid_cache);
483} 483}
484 484
485/*
486 * Deferred request handling
487 */
488
489struct idmap_defer_req {
490 struct cache_req req;
491 struct cache_deferred_req deferred_req;
492 wait_queue_head_t waitq;
493 atomic_t count;
494};
495
496static inline void
497put_mdr(struct idmap_defer_req *mdr)
498{
499 if (atomic_dec_and_test(&mdr->count))
500 kfree(mdr);
501}
502
503static inline void
504get_mdr(struct idmap_defer_req *mdr)
505{
506 atomic_inc(&mdr->count);
507}
508
509static void
510idmap_revisit(struct cache_deferred_req *dreq, int toomany)
511{
512 struct idmap_defer_req *mdr =
513 container_of(dreq, struct idmap_defer_req, deferred_req);
514
515 wake_up(&mdr->waitq);
516 put_mdr(mdr);
517}
518
519static struct cache_deferred_req *
520idmap_defer(struct cache_req *req)
521{
522 struct idmap_defer_req *mdr =
523 container_of(req, struct idmap_defer_req, req);
524
525 mdr->deferred_req.revisit = idmap_revisit;
526 get_mdr(mdr);
527 return (&mdr->deferred_req);
528}
529
530static inline int
531do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key,
532 struct cache_detail *detail, struct ent **item,
533 struct idmap_defer_req *mdr)
534{
535 *item = lookup_fn(key);
536 if (!*item)
537 return -ENOMEM;
538 return cache_check(detail, &(*item)->h, &mdr->req);
539}
540
541static inline int
542do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *),
543 struct ent *key, struct cache_detail *detail,
544 struct ent **item)
545{
546 int ret = -ENOMEM;
547
548 *item = lookup_fn(key);
549 if (!*item)
550 goto out_err;
551 ret = -ETIMEDOUT;
552 if (!test_bit(CACHE_VALID, &(*item)->h.flags)
553 || (*item)->h.expiry_time < get_seconds()
554 || detail->flush_time > (*item)->h.last_refresh)
555 goto out_put;
556 ret = -ENOENT;
557 if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
558 goto out_put;
559 return 0;
560out_put:
561 cache_put(&(*item)->h, detail);
562out_err:
563 *item = NULL;
564 return ret;
565}
566
567static int 485static int
568idmap_lookup(struct svc_rqst *rqstp, 486idmap_lookup(struct svc_rqst *rqstp,
569 struct ent *(*lookup_fn)(struct ent *), struct ent *key, 487 struct ent *(*lookup_fn)(struct ent *), struct ent *key,
570 struct cache_detail *detail, struct ent **item) 488 struct cache_detail *detail, struct ent **item)
571{ 489{
572 struct idmap_defer_req *mdr;
573 int ret; 490 int ret;
574 491
575 mdr = kzalloc(sizeof(*mdr), GFP_KERNEL); 492 *item = lookup_fn(key);
576 if (!mdr) 493 if (!*item)
577 return -ENOMEM; 494 return -ENOMEM;
578 atomic_set(&mdr->count, 1); 495 retry:
579 init_waitqueue_head(&mdr->waitq); 496 ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle);
580 mdr->req.defer = idmap_defer; 497
581 ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr); 498 if (ret == -ETIMEDOUT) {
582 if (ret == -EAGAIN) { 499 struct ent *prev_item = *item;
583 wait_event_interruptible_timeout(mdr->waitq, 500 *item = lookup_fn(key);
584 test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ); 501 if (*item != prev_item)
585 ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item); 502 goto retry;
503 cache_put(&(*item)->h, detail);
586 } 504 }
587 put_mdr(mdr);
588 return ret; 505 return ret;
589} 506}
590 507
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 59ec449b0c7f..0cdfd022bb7b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1031,8 +1031,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
1031 resp->cstate.session = NULL; 1031 resp->cstate.session = NULL;
1032 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); 1032 fh_init(&resp->cstate.current_fh, NFS4_FHSIZE);
1033 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); 1033 fh_init(&resp->cstate.save_fh, NFS4_FHSIZE);
1034 /* Use the deferral mechanism only for NFSv4.0 compounds */ 1034 /*
1035 rqstp->rq_usedeferral = (args->minorversion == 0); 1035 * Don't use the deferral mechanism for NFSv4; compounds make it
1036 * too hard to avoid non-idempotency problems.
1037 */
1038 rqstp->rq_usedeferral = 0;
1036 1039
1037 /* 1040 /*
1038 * According to RFC3010, this takes precedence over all other errors. 1041 * According to RFC3010, this takes precedence over all other errors.
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a7292fcf7718..9019e8ec9dc8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -207,7 +207,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
207{ 207{
208 struct nfs4_delegation *dp; 208 struct nfs4_delegation *dp;
209 struct nfs4_file *fp = stp->st_file; 209 struct nfs4_file *fp = stp->st_file;
210 struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn;
211 210
212 dprintk("NFSD alloc_init_deleg\n"); 211 dprintk("NFSD alloc_init_deleg\n");
213 /* 212 /*
@@ -234,7 +233,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
234 nfs4_file_get_access(fp, O_RDONLY); 233 nfs4_file_get_access(fp, O_RDONLY);
235 dp->dl_flock = NULL; 234 dp->dl_flock = NULL;
236 dp->dl_type = type; 235 dp->dl_type = type;
237 dp->dl_ident = cb->cb_ident;
238 dp->dl_stateid.si_boot = boot_time; 236 dp->dl_stateid.si_boot = boot_time;
239 dp->dl_stateid.si_stateownerid = current_delegid++; 237 dp->dl_stateid.si_stateownerid = current_delegid++;
240 dp->dl_stateid.si_fileid = 0; 238 dp->dl_stateid.si_fileid = 0;
@@ -535,171 +533,258 @@ gen_sessionid(struct nfsd4_session *ses)
535 */ 533 */
536#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) 534#define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44)
537 535
536static void
537free_session_slots(struct nfsd4_session *ses)
538{
539 int i;
540
541 for (i = 0; i < ses->se_fchannel.maxreqs; i++)
542 kfree(ses->se_slots[i]);
543}
544
538/* 545/*
539 * Give the client the number of ca_maxresponsesize_cached slots it 546 * We don't actually need to cache the rpc and session headers, so we
540 * requests, of size bounded by NFSD_SLOT_CACHE_SIZE, 547 * can allocate a little less for each slot:
541 * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more 548 */
542 * than NFSD_MAX_SLOTS_PER_SESSION. 549static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
543 * 550{
544 * If we run out of reserved DRC memory we should (up to a point) 551 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
552}
553
554static int nfsd4_sanitize_slot_size(u32 size)
555{
556 size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */
557 size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE);
558
559 return size;
560}
561
562/*
563 * XXX: If we run out of reserved DRC memory we could (up to a point)
545 * re-negotiate active sessions and reduce their slot usage to make 564 * re-negotiate active sessions and reduce their slot usage to make
546 * rooom for new connections. For now we just fail the create session. 565 * rooom for new connections. For now we just fail the create session.
547 */ 566 */
548static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) 567static int nfsd4_get_drc_mem(int slotsize, u32 num)
549{ 568{
550 int mem, size = fchan->maxresp_cached; 569 int avail;
551 570
552 if (fchan->maxreqs < 1) 571 num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
553 return nfserr_inval;
554 572
555 if (size < NFSD_MIN_HDR_SEQ_SZ) 573 spin_lock(&nfsd_drc_lock);
556 size = NFSD_MIN_HDR_SEQ_SZ; 574 avail = min_t(int, NFSD_MAX_MEM_PER_SESSION,
557 size -= NFSD_MIN_HDR_SEQ_SZ; 575 nfsd_drc_max_mem - nfsd_drc_mem_used);
558 if (size > NFSD_SLOT_CACHE_SIZE) 576 num = min_t(int, num, avail / slotsize);
559 size = NFSD_SLOT_CACHE_SIZE; 577 nfsd_drc_mem_used += num * slotsize;
560 578 spin_unlock(&nfsd_drc_lock);
561 /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
562 mem = fchan->maxreqs * size;
563 if (mem > NFSD_MAX_MEM_PER_SESSION) {
564 fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
565 if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
566 fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
567 mem = fchan->maxreqs * size;
568 }
569 579
580 return num;
581}
582
583static void nfsd4_put_drc_mem(int slotsize, int num)
584{
570 spin_lock(&nfsd_drc_lock); 585 spin_lock(&nfsd_drc_lock);
571 /* bound the total session drc memory ussage */ 586 nfsd_drc_mem_used -= slotsize * num;
572 if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
573 fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
574 mem = fchan->maxreqs * size;
575 }
576 nfsd_drc_mem_used += mem;
577 spin_unlock(&nfsd_drc_lock); 587 spin_unlock(&nfsd_drc_lock);
588}
578 589
579 if (fchan->maxreqs == 0) 590static struct nfsd4_session *alloc_session(int slotsize, int numslots)
580 return nfserr_jukebox; 591{
592 struct nfsd4_session *new;
593 int mem, i;
581 594
582 fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; 595 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
583 return 0; 596 + sizeof(struct nfsd4_session) > PAGE_SIZE);
597 mem = numslots * sizeof(struct nfsd4_slot *);
598
599 new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
600 if (!new)
601 return NULL;
602 /* allocate each struct nfsd4_slot and data cache in one piece */
603 for (i = 0; i < numslots; i++) {
604 mem = sizeof(struct nfsd4_slot) + slotsize;
605 new->se_slots[i] = kzalloc(mem, GFP_KERNEL);
606 if (!new->se_slots[i])
607 goto out_free;
608 }
609 return new;
610out_free:
611 while (i--)
612 kfree(new->se_slots[i]);
613 kfree(new);
614 return NULL;
584} 615}
585 616
586/* 617static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize)
587 * fchan holds the client values on input, and the server values on output
588 * sv_max_mesg is the maximum payload plus one page for overhead.
589 */
590static int init_forechannel_attrs(struct svc_rqst *rqstp,
591 struct nfsd4_channel_attrs *session_fchan,
592 struct nfsd4_channel_attrs *fchan)
593{ 618{
594 int status = 0; 619 u32 maxrpc = nfsd_serv->sv_max_mesg;
595 __u32 maxcount = nfsd_serv->sv_max_mesg;
596 620
597 /* headerpadsz set to zero in encode routine */ 621 new->maxreqs = numslots;
622 new->maxresp_cached = slotsize + NFSD_MIN_HDR_SEQ_SZ;
623 new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc);
624 new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc);
625 new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND);
626}
598 627
599 /* Use the client's max request and max response size if possible */ 628static void free_conn(struct nfsd4_conn *c)
600 if (fchan->maxreq_sz > maxcount) 629{
601 fchan->maxreq_sz = maxcount; 630 svc_xprt_put(c->cn_xprt);
602 session_fchan->maxreq_sz = fchan->maxreq_sz; 631 kfree(c);
632}
603 633
604 if (fchan->maxresp_sz > maxcount) 634static void nfsd4_conn_lost(struct svc_xpt_user *u)
605 fchan->maxresp_sz = maxcount; 635{
606 session_fchan->maxresp_sz = fchan->maxresp_sz; 636 struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
637 struct nfs4_client *clp = c->cn_session->se_client;
607 638
608 /* Use the client's maxops if possible */ 639 spin_lock(&clp->cl_lock);
609 if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) 640 if (!list_empty(&c->cn_persession)) {
610 fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; 641 list_del(&c->cn_persession);
611 session_fchan->maxops = fchan->maxops; 642 free_conn(c);
643 }
644 spin_unlock(&clp->cl_lock);
645}
612 646
613 /* FIXME: Error means no more DRC pages so the server should 647static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags)
614 * recover pages from existing sessions. For now fail session 648{
615 * creation. 649 struct nfsd4_conn *conn;
616 */
617 status = set_forechannel_drc_size(fchan);
618 650
619 session_fchan->maxresp_cached = fchan->maxresp_cached; 651 conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL);
620 session_fchan->maxreqs = fchan->maxreqs; 652 if (!conn)
653 return NULL;
654 svc_xprt_get(rqstp->rq_xprt);
655 conn->cn_xprt = rqstp->rq_xprt;
656 conn->cn_flags = flags;
657 INIT_LIST_HEAD(&conn->cn_xpt_user.list);
658 return conn;
659}
621 660
622 dprintk("%s status %d\n", __func__, status); 661static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
623 return status; 662{
663 conn->cn_session = ses;
664 list_add(&conn->cn_persession, &ses->se_conns);
624} 665}
625 666
626static void 667static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
627free_session_slots(struct nfsd4_session *ses)
628{ 668{
629 int i; 669 struct nfs4_client *clp = ses->se_client;
630 670
631 for (i = 0; i < ses->se_fchannel.maxreqs; i++) 671 spin_lock(&clp->cl_lock);
632 kfree(ses->se_slots[i]); 672 __nfsd4_hash_conn(conn, ses);
673 spin_unlock(&clp->cl_lock);
633} 674}
634 675
635/* 676static void nfsd4_register_conn(struct nfsd4_conn *conn)
636 * We don't actually need to cache the rpc and session headers, so we
637 * can allocate a little less for each slot:
638 */
639static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
640{ 677{
641 return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; 678 conn->cn_xpt_user.callback = nfsd4_conn_lost;
679 register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
642} 680}
643 681
644static int 682static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
645alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
646 struct nfsd4_create_session *cses)
647{ 683{
648 struct nfsd4_session *new, tmp; 684 struct nfsd4_conn *conn;
649 struct nfsd4_slot *sp; 685 u32 flags = NFS4_CDFC4_FORE;
650 int idx, slotsize, cachesize, i;
651 int status;
652 686
653 memset(&tmp, 0, sizeof(tmp)); 687 if (ses->se_flags & SESSION4_BACK_CHAN)
688 flags |= NFS4_CDFC4_BACK;
689 conn = alloc_conn(rqstp, flags);
690 if (!conn)
691 return nfserr_jukebox;
692 nfsd4_hash_conn(conn, ses);
693 nfsd4_register_conn(conn);
694 return nfs_ok;
695}
654 696
655 /* FIXME: For now, we just accept the client back channel attributes. */ 697static void nfsd4_del_conns(struct nfsd4_session *s)
656 tmp.se_bchannel = cses->back_channel; 698{
657 status = init_forechannel_attrs(rqstp, &tmp.se_fchannel, 699 struct nfs4_client *clp = s->se_client;
658 &cses->fore_channel); 700 struct nfsd4_conn *c;
659 if (status)
660 goto out;
661 701
662 BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) 702 spin_lock(&clp->cl_lock);
663 + sizeof(struct nfsd4_session) > PAGE_SIZE); 703 while (!list_empty(&s->se_conns)) {
704 c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession);
705 list_del_init(&c->cn_persession);
706 spin_unlock(&clp->cl_lock);
664 707
665 status = nfserr_jukebox; 708 unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user);
666 /* allocate struct nfsd4_session and slot table pointers in one piece */ 709 free_conn(c);
667 slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
668 new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
669 if (!new)
670 goto out;
671 710
672 memcpy(new, &tmp, sizeof(*new)); 711 spin_lock(&clp->cl_lock);
712 }
713 spin_unlock(&clp->cl_lock);
714}
673 715
674 /* allocate each struct nfsd4_slot and data cache in one piece */ 716void free_session(struct kref *kref)
675 cachesize = slot_bytes(&new->se_fchannel); 717{
676 for (i = 0; i < new->se_fchannel.maxreqs; i++) { 718 struct nfsd4_session *ses;
677 sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); 719 int mem;
678 if (!sp) 720
679 goto out_free; 721 ses = container_of(kref, struct nfsd4_session, se_ref);
680 new->se_slots[i] = sp; 722 nfsd4_del_conns(ses);
723 spin_lock(&nfsd_drc_lock);
724 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
725 nfsd_drc_mem_used -= mem;
726 spin_unlock(&nfsd_drc_lock);
727 free_session_slots(ses);
728 kfree(ses);
729}
730
731static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses)
732{
733 struct nfsd4_session *new;
734 struct nfsd4_channel_attrs *fchan = &cses->fore_channel;
735 int numslots, slotsize;
736 int status;
737 int idx;
738
739 /*
740 * Note decreasing slot size below client's request may
741 * make it difficult for client to function correctly, whereas
742 * decreasing the number of slots will (just?) affect
743 * performance. When short on memory we therefore prefer to
744 * decrease number of slots instead of their size.
745 */
746 slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached);
747 numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs);
748
749 new = alloc_session(slotsize, numslots);
750 if (!new) {
751 nfsd4_put_drc_mem(slotsize, fchan->maxreqs);
752 return NULL;
681 } 753 }
754 init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize);
682 755
683 new->se_client = clp; 756 new->se_client = clp;
684 gen_sessionid(new); 757 gen_sessionid(new);
685 idx = hash_sessionid(&new->se_sessionid);
686 memcpy(clp->cl_sessionid.data, new->se_sessionid.data,
687 NFS4_MAX_SESSIONID_LEN);
688 758
759 INIT_LIST_HEAD(&new->se_conns);
760
761 new->se_cb_seq_nr = 1;
689 new->se_flags = cses->flags; 762 new->se_flags = cses->flags;
763 new->se_cb_prog = cses->callback_prog;
690 kref_init(&new->se_ref); 764 kref_init(&new->se_ref);
765 idx = hash_sessionid(&new->se_sessionid);
691 spin_lock(&client_lock); 766 spin_lock(&client_lock);
692 list_add(&new->se_hash, &sessionid_hashtbl[idx]); 767 list_add(&new->se_hash, &sessionid_hashtbl[idx]);
693 list_add(&new->se_perclnt, &clp->cl_sessions); 768 list_add(&new->se_perclnt, &clp->cl_sessions);
694 spin_unlock(&client_lock); 769 spin_unlock(&client_lock);
695 770
696 status = nfs_ok; 771 status = nfsd4_new_conn(rqstp, new);
697out: 772 /* whoops: benny points out, status is ignored! (err, or bogus) */
698 return status; 773 if (status) {
699out_free: 774 free_session(&new->se_ref);
700 free_session_slots(new); 775 return NULL;
701 kfree(new); 776 }
702 goto out; 777 if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) {
778 struct sockaddr *sa = svc_addr(rqstp);
779
780 clp->cl_cb_session = new;
781 clp->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
782 svc_xprt_get(rqstp->rq_xprt);
783 rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
784 clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
785 nfsd4_probe_callback(clp);
786 }
787 return new;
703} 788}
704 789
705/* caller must hold client_lock */ 790/* caller must hold client_lock */
@@ -731,21 +816,6 @@ unhash_session(struct nfsd4_session *ses)
731 list_del(&ses->se_perclnt); 816 list_del(&ses->se_perclnt);
732} 817}
733 818
734void
735free_session(struct kref *kref)
736{
737 struct nfsd4_session *ses;
738 int mem;
739
740 ses = container_of(kref, struct nfsd4_session, se_ref);
741 spin_lock(&nfsd_drc_lock);
742 mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
743 nfsd_drc_mem_used -= mem;
744 spin_unlock(&nfsd_drc_lock);
745 free_session_slots(ses);
746 kfree(ses);
747}
748
749/* must be called under the client_lock */ 819/* must be called under the client_lock */
750static inline void 820static inline void
751renew_client_locked(struct nfs4_client *clp) 821renew_client_locked(struct nfs4_client *clp)
@@ -812,6 +882,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
812static inline void 882static inline void
813free_client(struct nfs4_client *clp) 883free_client(struct nfs4_client *clp)
814{ 884{
885 while (!list_empty(&clp->cl_sessions)) {
886 struct nfsd4_session *ses;
887 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
888 se_perclnt);
889 list_del(&ses->se_perclnt);
890 nfsd4_put_session(ses);
891 }
815 if (clp->cl_cred.cr_group_info) 892 if (clp->cl_cred.cr_group_info)
816 put_group_info(clp->cl_cred.cr_group_info); 893 put_group_info(clp->cl_cred.cr_group_info);
817 kfree(clp->cl_principal); 894 kfree(clp->cl_principal);
@@ -838,15 +915,12 @@ release_session_client(struct nfsd4_session *session)
838static inline void 915static inline void
839unhash_client_locked(struct nfs4_client *clp) 916unhash_client_locked(struct nfs4_client *clp)
840{ 917{
918 struct nfsd4_session *ses;
919
841 mark_client_expired(clp); 920 mark_client_expired(clp);
842 list_del(&clp->cl_lru); 921 list_del(&clp->cl_lru);
843 while (!list_empty(&clp->cl_sessions)) { 922 list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
844 struct nfsd4_session *ses; 923 list_del_init(&ses->se_hash);
845 ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
846 se_perclnt);
847 unhash_session(ses);
848 nfsd4_put_session(ses);
849 }
850} 924}
851 925
852static void 926static void
@@ -875,7 +949,7 @@ expire_client(struct nfs4_client *clp)
875 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); 949 sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient);
876 release_openowner(sop); 950 release_openowner(sop);
877 } 951 }
878 nfsd4_set_callback_client(clp, NULL); 952 nfsd4_shutdown_callback(clp);
879 if (clp->cl_cb_conn.cb_xprt) 953 if (clp->cl_cb_conn.cb_xprt)
880 svc_xprt_put(clp->cl_cb_conn.cb_xprt); 954 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
881 list_del(&clp->cl_idhash); 955 list_del(&clp->cl_idhash);
@@ -960,6 +1034,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
960 if (clp == NULL) 1034 if (clp == NULL)
961 return NULL; 1035 return NULL;
962 1036
1037 INIT_LIST_HEAD(&clp->cl_sessions);
1038
963 princ = svc_gss_principal(rqstp); 1039 princ = svc_gss_principal(rqstp);
964 if (princ) { 1040 if (princ) {
965 clp->cl_principal = kstrdup(princ, GFP_KERNEL); 1041 clp->cl_principal = kstrdup(princ, GFP_KERNEL);
@@ -976,8 +1052,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
976 INIT_LIST_HEAD(&clp->cl_strhash); 1052 INIT_LIST_HEAD(&clp->cl_strhash);
977 INIT_LIST_HEAD(&clp->cl_openowners); 1053 INIT_LIST_HEAD(&clp->cl_openowners);
978 INIT_LIST_HEAD(&clp->cl_delegations); 1054 INIT_LIST_HEAD(&clp->cl_delegations);
979 INIT_LIST_HEAD(&clp->cl_sessions);
980 INIT_LIST_HEAD(&clp->cl_lru); 1055 INIT_LIST_HEAD(&clp->cl_lru);
1056 spin_lock_init(&clp->cl_lock);
1057 INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc);
981 clp->cl_time = get_seconds(); 1058 clp->cl_time = get_seconds();
982 clear_bit(0, &clp->cl_cb_slot_busy); 1059 clear_bit(0, &clp->cl_cb_slot_busy);
983 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 1060 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
@@ -986,7 +1063,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
986 clp->cl_flavor = rqstp->rq_flavor; 1063 clp->cl_flavor = rqstp->rq_flavor;
987 copy_cred(&clp->cl_cred, &rqstp->rq_cred); 1064 copy_cred(&clp->cl_cred, &rqstp->rq_cred);
988 gen_confirm(clp); 1065 gen_confirm(clp);
989 1066 clp->cl_cb_session = NULL;
990 return clp; 1067 return clp;
991} 1068}
992 1069
@@ -1098,7 +1175,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
1098static void 1175static void
1099gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) 1176gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
1100{ 1177{
1101 struct nfs4_cb_conn *cb = &clp->cl_cb_conn; 1178 struct nfs4_cb_conn *conn = &clp->cl_cb_conn;
1102 unsigned short expected_family; 1179 unsigned short expected_family;
1103 1180
1104 /* Currently, we only support tcp and tcp6 for the callback channel */ 1181 /* Currently, we only support tcp and tcp6 for the callback channel */
@@ -1111,24 +1188,23 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
1111 else 1188 else
1112 goto out_err; 1189 goto out_err;
1113 1190
1114 cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, 1191 conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
1115 se->se_callback_addr_len, 1192 se->se_callback_addr_len,
1116 (struct sockaddr *) &cb->cb_addr, 1193 (struct sockaddr *)&conn->cb_addr,
1117 sizeof(cb->cb_addr)); 1194 sizeof(conn->cb_addr));
1118 1195
1119 if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) 1196 if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family)
1120 goto out_err; 1197 goto out_err;
1121 1198
1122 if (cb->cb_addr.ss_family == AF_INET6) 1199 if (conn->cb_addr.ss_family == AF_INET6)
1123 ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; 1200 ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid;
1124 1201
1125 cb->cb_minorversion = 0; 1202 conn->cb_prog = se->se_callback_prog;
1126 cb->cb_prog = se->se_callback_prog; 1203 conn->cb_ident = se->se_callback_ident;
1127 cb->cb_ident = se->se_callback_ident;
1128 return; 1204 return;
1129out_err: 1205out_err:
1130 cb->cb_addr.ss_family = AF_UNSPEC; 1206 conn->cb_addr.ss_family = AF_UNSPEC;
1131 cb->cb_addrlen = 0; 1207 conn->cb_addrlen = 0;
1132 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " 1208 dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
1133 "will not receive delegations\n", 1209 "will not receive delegations\n",
1134 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); 1210 clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
@@ -1415,7 +1491,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1415{ 1491{
1416 struct sockaddr *sa = svc_addr(rqstp); 1492 struct sockaddr *sa = svc_addr(rqstp);
1417 struct nfs4_client *conf, *unconf; 1493 struct nfs4_client *conf, *unconf;
1494 struct nfsd4_session *new;
1418 struct nfsd4_clid_slot *cs_slot = NULL; 1495 struct nfsd4_clid_slot *cs_slot = NULL;
1496 bool confirm_me = false;
1419 int status = 0; 1497 int status = 0;
1420 1498
1421 nfs4_lock_state(); 1499 nfs4_lock_state();
@@ -1438,7 +1516,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1438 cs_slot->sl_seqid, cr_ses->seqid); 1516 cs_slot->sl_seqid, cr_ses->seqid);
1439 goto out; 1517 goto out;
1440 } 1518 }
1441 cs_slot->sl_seqid++;
1442 } else if (unconf) { 1519 } else if (unconf) {
1443 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || 1520 if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
1444 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { 1521 !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
@@ -1451,25 +1528,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1451 if (status) { 1528 if (status) {
1452 /* an unconfirmed replay returns misordered */ 1529 /* an unconfirmed replay returns misordered */
1453 status = nfserr_seq_misordered; 1530 status = nfserr_seq_misordered;
1454 goto out_cache; 1531 goto out;
1455 } 1532 }
1456 1533
1457 cs_slot->sl_seqid++; /* from 0 to 1 */ 1534 confirm_me = true;
1458 move_to_confirmed(unconf);
1459
1460 if (cr_ses->flags & SESSION4_BACK_CHAN) {
1461 unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt;
1462 svc_xprt_get(rqstp->rq_xprt);
1463 rpc_copy_addr(
1464 (struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
1465 sa);
1466 unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
1467 unconf->cl_cb_conn.cb_minorversion =
1468 cstate->minorversion;
1469 unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
1470 unconf->cl_cb_seq_nr = 1;
1471 nfsd4_probe_callback(unconf, &unconf->cl_cb_conn);
1472 }
1473 conf = unconf; 1535 conf = unconf;
1474 } else { 1536 } else {
1475 status = nfserr_stale_clientid; 1537 status = nfserr_stale_clientid;
@@ -1477,22 +1539,30 @@ nfsd4_create_session(struct svc_rqst *rqstp,
1477 } 1539 }
1478 1540
1479 /* 1541 /*
1542 * XXX: we should probably set this at creation time, and check
1543 * for consistent minorversion use throughout:
1544 */
1545 conf->cl_minorversion = 1;
1546 /*
1480 * We do not support RDMA or persistent sessions 1547 * We do not support RDMA or persistent sessions
1481 */ 1548 */
1482 cr_ses->flags &= ~SESSION4_PERSIST; 1549 cr_ses->flags &= ~SESSION4_PERSIST;
1483 cr_ses->flags &= ~SESSION4_RDMA; 1550 cr_ses->flags &= ~SESSION4_RDMA;
1484 1551
1485 status = alloc_init_session(rqstp, conf, cr_ses); 1552 status = nfserr_jukebox;
1486 if (status) 1553 new = alloc_init_session(rqstp, conf, cr_ses);
1554 if (!new)
1487 goto out; 1555 goto out;
1488 1556 status = nfs_ok;
1489 memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, 1557 memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
1490 NFS4_MAX_SESSIONID_LEN); 1558 NFS4_MAX_SESSIONID_LEN);
1559 cs_slot->sl_seqid++;
1491 cr_ses->seqid = cs_slot->sl_seqid; 1560 cr_ses->seqid = cs_slot->sl_seqid;
1492 1561
1493out_cache:
1494 /* cache solo and embedded create sessions under the state lock */ 1562 /* cache solo and embedded create sessions under the state lock */
1495 nfsd4_cache_create_session(cr_ses, cs_slot, status); 1563 nfsd4_cache_create_session(cr_ses, cs_slot, status);
1564 if (confirm_me)
1565 move_to_confirmed(conf);
1496out: 1566out:
1497 nfs4_unlock_state(); 1567 nfs4_unlock_state();
1498 dprintk("%s returns %d\n", __func__, ntohl(status)); 1568 dprintk("%s returns %d\n", __func__, ntohl(status));
@@ -1546,8 +1616,11 @@ nfsd4_destroy_session(struct svc_rqst *r,
1546 1616
1547 nfs4_lock_state(); 1617 nfs4_lock_state();
1548 /* wait for callbacks */ 1618 /* wait for callbacks */
1549 nfsd4_set_callback_client(ses->se_client, NULL); 1619 nfsd4_shutdown_callback(ses->se_client);
1550 nfs4_unlock_state(); 1620 nfs4_unlock_state();
1621
1622 nfsd4_del_conns(ses);
1623
1551 nfsd4_put_session(ses); 1624 nfsd4_put_session(ses);
1552 status = nfs_ok; 1625 status = nfs_ok;
1553out: 1626out:
@@ -1555,6 +1628,36 @@ out:
1555 return status; 1628 return status;
1556} 1629}
1557 1630
1631static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
1632{
1633 struct nfsd4_conn *c;
1634
1635 list_for_each_entry(c, &s->se_conns, cn_persession) {
1636 if (c->cn_xprt == xpt) {
1637 return c;
1638 }
1639 }
1640 return NULL;
1641}
1642
1643static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
1644{
1645 struct nfs4_client *clp = ses->se_client;
1646 struct nfsd4_conn *c;
1647
1648 spin_lock(&clp->cl_lock);
1649 c = __nfsd4_find_conn(new->cn_xprt, ses);
1650 if (c) {
1651 spin_unlock(&clp->cl_lock);
1652 free_conn(new);
1653 return;
1654 }
1655 __nfsd4_hash_conn(new, ses);
1656 spin_unlock(&clp->cl_lock);
1657 nfsd4_register_conn(new);
1658 return;
1659}
1660
1558__be32 1661__be32
1559nfsd4_sequence(struct svc_rqst *rqstp, 1662nfsd4_sequence(struct svc_rqst *rqstp,
1560 struct nfsd4_compound_state *cstate, 1663 struct nfsd4_compound_state *cstate,
@@ -1563,11 +1666,20 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1563 struct nfsd4_compoundres *resp = rqstp->rq_resp; 1666 struct nfsd4_compoundres *resp = rqstp->rq_resp;
1564 struct nfsd4_session *session; 1667 struct nfsd4_session *session;
1565 struct nfsd4_slot *slot; 1668 struct nfsd4_slot *slot;
1669 struct nfsd4_conn *conn;
1566 int status; 1670 int status;
1567 1671
1568 if (resp->opcnt != 1) 1672 if (resp->opcnt != 1)
1569 return nfserr_sequence_pos; 1673 return nfserr_sequence_pos;
1570 1674
1675 /*
1676 * Will be either used or freed by nfsd4_sequence_check_conn
1677 * below.
1678 */
1679 conn = alloc_conn(rqstp, NFS4_CDFC4_FORE);
1680 if (!conn)
1681 return nfserr_jukebox;
1682
1571 spin_lock(&client_lock); 1683 spin_lock(&client_lock);
1572 status = nfserr_badsession; 1684 status = nfserr_badsession;
1573 session = find_in_sessionid_hashtbl(&seq->sessionid); 1685 session = find_in_sessionid_hashtbl(&seq->sessionid);
@@ -1599,6 +1711,9 @@ nfsd4_sequence(struct svc_rqst *rqstp,
1599 if (status) 1711 if (status)
1600 goto out; 1712 goto out;
1601 1713
1714 nfsd4_sequence_check_conn(conn, session);
1715 conn = NULL;
1716
1602 /* Success! bump slot seqid */ 1717 /* Success! bump slot seqid */
1603 slot->sl_inuse = true; 1718 slot->sl_inuse = true;
1604 slot->sl_seqid = seq->seqid; 1719 slot->sl_seqid = seq->seqid;
@@ -1613,6 +1728,7 @@ out:
1613 nfsd4_get_session(cstate->session); 1728 nfsd4_get_session(cstate->session);
1614 atomic_inc(&session->se_client->cl_refcount); 1729 atomic_inc(&session->se_client->cl_refcount);
1615 } 1730 }
1731 kfree(conn);
1616 spin_unlock(&client_lock); 1732 spin_unlock(&client_lock);
1617 dprintk("%s: return %d\n", __func__, ntohl(status)); 1733 dprintk("%s: return %d\n", __func__, ntohl(status));
1618 return status; 1734 return status;
@@ -1747,6 +1863,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1747 goto out; 1863 goto out;
1748 gen_clid(new); 1864 gen_clid(new);
1749 } 1865 }
1866 /*
1867 * XXX: we should probably set this at creation time, and check
1868 * for consistent minorversion use throughout:
1869 */
1870 new->cl_minorversion = 0;
1750 gen_callback(new, setclid, rpc_get_scope_id(sa)); 1871 gen_callback(new, setclid, rpc_get_scope_id(sa));
1751 add_to_unconfirmed(new, strhashval); 1872 add_to_unconfirmed(new, strhashval);
1752 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; 1873 setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
@@ -1807,7 +1928,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1807 status = nfserr_clid_inuse; 1928 status = nfserr_clid_inuse;
1808 else { 1929 else {
1809 atomic_set(&conf->cl_cb_set, 0); 1930 atomic_set(&conf->cl_cb_set, 0);
1810 nfsd4_probe_callback(conf, &unconf->cl_cb_conn); 1931 nfsd4_change_callback(conf, &unconf->cl_cb_conn);
1932 nfsd4_probe_callback(conf);
1811 expire_client(unconf); 1933 expire_client(unconf);
1812 status = nfs_ok; 1934 status = nfs_ok;
1813 1935
@@ -1841,7 +1963,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
1841 } 1963 }
1842 move_to_confirmed(unconf); 1964 move_to_confirmed(unconf);
1843 conf = unconf; 1965 conf = unconf;
1844 nfsd4_probe_callback(conf, &conf->cl_cb_conn); 1966 nfsd4_probe_callback(conf);
1845 status = nfs_ok; 1967 status = nfs_ok;
1846 } 1968 }
1847 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) 1969 } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm)))
@@ -2944,7 +3066,11 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2944 if (STALE_STATEID(stateid)) 3066 if (STALE_STATEID(stateid))
2945 goto out; 3067 goto out;
2946 3068
2947 status = nfserr_bad_stateid; 3069 /*
3070 * We assume that any stateid that has the current boot time,
3071 * but that we can't find, is expired:
3072 */
3073 status = nfserr_expired;
2948 if (is_delegation_stateid(stateid)) { 3074 if (is_delegation_stateid(stateid)) {
2949 dp = find_delegation_stateid(ino, stateid); 3075 dp = find_delegation_stateid(ino, stateid);
2950 if (!dp) 3076 if (!dp)
@@ -2964,6 +3090,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
2964 stp = find_stateid(stateid, flags); 3090 stp = find_stateid(stateid, flags);
2965 if (!stp) 3091 if (!stp)
2966 goto out; 3092 goto out;
3093 status = nfserr_bad_stateid;
2967 if (nfs4_check_fh(current_fh, stp)) 3094 if (nfs4_check_fh(current_fh, stp))
2968 goto out; 3095 goto out;
2969 if (!stp->st_stateowner->so_confirmed) 3096 if (!stp->st_stateowner->so_confirmed)
@@ -3038,8 +3165,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
3038 * a replayed close: 3165 * a replayed close:
3039 */ 3166 */
3040 sop = search_close_lru(stateid->si_stateownerid, flags); 3167 sop = search_close_lru(stateid->si_stateownerid, flags);
3168 /* It's not stale; let's assume it's expired: */
3041 if (sop == NULL) 3169 if (sop == NULL)
3042 return nfserr_bad_stateid; 3170 return nfserr_expired;
3043 *sopp = sop; 3171 *sopp = sop;
3044 goto check_replay; 3172 goto check_replay;
3045 } 3173 }
@@ -3304,6 +3432,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
3304 status = nfserr_bad_stateid; 3432 status = nfserr_bad_stateid;
3305 if (!is_delegation_stateid(stateid)) 3433 if (!is_delegation_stateid(stateid))
3306 goto out; 3434 goto out;
3435 status = nfserr_expired;
3307 dp = find_delegation_stateid(inode, stateid); 3436 dp = find_delegation_stateid(inode, stateid);
3308 if (!dp) 3437 if (!dp)
3309 goto out; 3438 goto out;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1a468bbd330f..f35a94a04026 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1805,19 +1805,23 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1805 goto out_nfserr; 1805 goto out_nfserr;
1806 } 1806 }
1807 } 1807 }
1808 if ((buflen -= 16) < 0)
1809 goto out_resource;
1810 1808
1811 if (unlikely(bmval2)) { 1809 if (bmval2) {
1810 if ((buflen -= 16) < 0)
1811 goto out_resource;
1812 WRITE32(3); 1812 WRITE32(3);
1813 WRITE32(bmval0); 1813 WRITE32(bmval0);
1814 WRITE32(bmval1); 1814 WRITE32(bmval1);
1815 WRITE32(bmval2); 1815 WRITE32(bmval2);
1816 } else if (likely(bmval1)) { 1816 } else if (bmval1) {
1817 if ((buflen -= 12) < 0)
1818 goto out_resource;
1817 WRITE32(2); 1819 WRITE32(2);
1818 WRITE32(bmval0); 1820 WRITE32(bmval0);
1819 WRITE32(bmval1); 1821 WRITE32(bmval1);
1820 } else { 1822 } else {
1823 if ((buflen -= 8) < 0)
1824 goto out_resource;
1821 WRITE32(1); 1825 WRITE32(1);
1822 WRITE32(bmval0); 1826 WRITE32(bmval0);
1823 } 1827 }
@@ -1828,15 +1832,17 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
1828 u32 word1 = nfsd_suppattrs1(minorversion); 1832 u32 word1 = nfsd_suppattrs1(minorversion);
1829 u32 word2 = nfsd_suppattrs2(minorversion); 1833 u32 word2 = nfsd_suppattrs2(minorversion);
1830 1834
1831 if ((buflen -= 12) < 0)
1832 goto out_resource;
1833 if (!aclsupport) 1835 if (!aclsupport)
1834 word0 &= ~FATTR4_WORD0_ACL; 1836 word0 &= ~FATTR4_WORD0_ACL;
1835 if (!word2) { 1837 if (!word2) {
1838 if ((buflen -= 12) < 0)
1839 goto out_resource;
1836 WRITE32(2); 1840 WRITE32(2);
1837 WRITE32(word0); 1841 WRITE32(word0);
1838 WRITE32(word1); 1842 WRITE32(word1);
1839 } else { 1843 } else {
1844 if ((buflen -= 16) < 0)
1845 goto out_resource;
1840 WRITE32(3); 1846 WRITE32(3);
1841 WRITE32(word0); 1847 WRITE32(word0);
1842 WRITE32(word1); 1848 WRITE32(word1);
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 06fa87e52e82..d6dc3f61f8ba 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -22,6 +22,7 @@
22 */ 22 */
23enum { 23enum {
24 NFSD_Root = 1, 24 NFSD_Root = 1,
25#ifdef CONFIG_NFSD_DEPRECATED
25 NFSD_Svc, 26 NFSD_Svc,
26 NFSD_Add, 27 NFSD_Add,
27 NFSD_Del, 28 NFSD_Del,
@@ -29,6 +30,7 @@ enum {
29 NFSD_Unexport, 30 NFSD_Unexport,
30 NFSD_Getfd, 31 NFSD_Getfd,
31 NFSD_Getfs, 32 NFSD_Getfs,
33#endif
32 NFSD_List, 34 NFSD_List,
33 NFSD_Export_features, 35 NFSD_Export_features,
34 NFSD_Fh, 36 NFSD_Fh,
@@ -54,6 +56,7 @@ enum {
54/* 56/*
55 * write() for these nodes. 57 * write() for these nodes.
56 */ 58 */
59#ifdef CONFIG_NFSD_DEPRECATED
57static ssize_t write_svc(struct file *file, char *buf, size_t size); 60static ssize_t write_svc(struct file *file, char *buf, size_t size);
58static ssize_t write_add(struct file *file, char *buf, size_t size); 61static ssize_t write_add(struct file *file, char *buf, size_t size);
59static ssize_t write_del(struct file *file, char *buf, size_t size); 62static ssize_t write_del(struct file *file, char *buf, size_t size);
@@ -61,6 +64,7 @@ static ssize_t write_export(struct file *file, char *buf, size_t size);
61static ssize_t write_unexport(struct file *file, char *buf, size_t size); 64static ssize_t write_unexport(struct file *file, char *buf, size_t size);
62static ssize_t write_getfd(struct file *file, char *buf, size_t size); 65static ssize_t write_getfd(struct file *file, char *buf, size_t size);
63static ssize_t write_getfs(struct file *file, char *buf, size_t size); 66static ssize_t write_getfs(struct file *file, char *buf, size_t size);
67#endif
64static ssize_t write_filehandle(struct file *file, char *buf, size_t size); 68static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
65static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); 69static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size);
66static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); 70static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size);
@@ -76,6 +80,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
76#endif 80#endif
77 81
78static ssize_t (*write_op[])(struct file *, char *, size_t) = { 82static ssize_t (*write_op[])(struct file *, char *, size_t) = {
83#ifdef CONFIG_NFSD_DEPRECATED
79 [NFSD_Svc] = write_svc, 84 [NFSD_Svc] = write_svc,
80 [NFSD_Add] = write_add, 85 [NFSD_Add] = write_add,
81 [NFSD_Del] = write_del, 86 [NFSD_Del] = write_del,
@@ -83,6 +88,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
83 [NFSD_Unexport] = write_unexport, 88 [NFSD_Unexport] = write_unexport,
84 [NFSD_Getfd] = write_getfd, 89 [NFSD_Getfd] = write_getfd,
85 [NFSD_Getfs] = write_getfs, 90 [NFSD_Getfs] = write_getfs,
91#endif
86 [NFSD_Fh] = write_filehandle, 92 [NFSD_Fh] = write_filehandle,
87 [NFSD_FO_UnlockIP] = write_unlock_ip, 93 [NFSD_FO_UnlockIP] = write_unlock_ip,
88 [NFSD_FO_UnlockFS] = write_unlock_fs, 94 [NFSD_FO_UnlockFS] = write_unlock_fs,
@@ -121,6 +127,14 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
121 127
122static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) 128static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
123{ 129{
130 static int warned;
131 if (file->f_dentry->d_name.name[0] == '.' && !warned) {
132 printk(KERN_INFO
133 "Warning: \"%s\" uses deprecated NFSD interface: %s."
134 " This will be removed in 2.6.40\n",
135 current->comm, file->f_dentry->d_name.name);
136 warned = 1;
137 }
124 if (! file->private_data) { 138 if (! file->private_data) {
125 /* An attempt to read a transaction file without writing 139 /* An attempt to read a transaction file without writing
126 * causes a 0-byte write so that the file can return 140 * causes a 0-byte write so that the file can return
@@ -187,6 +201,7 @@ static const struct file_operations pool_stats_operations = {
187 * payload - write methods 201 * payload - write methods
188 */ 202 */
189 203
204#ifdef CONFIG_NFSD_DEPRECATED
190/** 205/**
191 * write_svc - Start kernel's NFSD server 206 * write_svc - Start kernel's NFSD server
192 * 207 *
@@ -402,7 +417,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size)
402 417
403 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); 418 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
404 419
405 clp = auth_unix_lookup(&in6); 420 clp = auth_unix_lookup(&init_net, &in6);
406 if (!clp) 421 if (!clp)
407 err = -EPERM; 422 err = -EPERM;
408 else { 423 else {
@@ -465,7 +480,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
465 480
466 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); 481 ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6);
467 482
468 clp = auth_unix_lookup(&in6); 483 clp = auth_unix_lookup(&init_net, &in6);
469 if (!clp) 484 if (!clp)
470 err = -EPERM; 485 err = -EPERM;
471 else { 486 else {
@@ -482,6 +497,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
482 out: 497 out:
483 return err; 498 return err;
484} 499}
500#endif /* CONFIG_NFSD_DEPRECATED */
485 501
486/** 502/**
487 * write_unlock_ip - Release all locks used by a client 503 * write_unlock_ip - Release all locks used by a client
@@ -1000,12 +1016,12 @@ static ssize_t __write_ports_addxprt(char *buf)
1000 if (err != 0) 1016 if (err != 0)
1001 return err; 1017 return err;
1002 1018
1003 err = svc_create_xprt(nfsd_serv, transport, 1019 err = svc_create_xprt(nfsd_serv, transport, &init_net,
1004 PF_INET, port, SVC_SOCK_ANONYMOUS); 1020 PF_INET, port, SVC_SOCK_ANONYMOUS);
1005 if (err < 0) 1021 if (err < 0)
1006 goto out_err; 1022 goto out_err;
1007 1023
1008 err = svc_create_xprt(nfsd_serv, transport, 1024 err = svc_create_xprt(nfsd_serv, transport, &init_net,
1009 PF_INET6, port, SVC_SOCK_ANONYMOUS); 1025 PF_INET6, port, SVC_SOCK_ANONYMOUS);
1010 if (err < 0 && err != -EAFNOSUPPORT) 1026 if (err < 0 && err != -EAFNOSUPPORT)
1011 goto out_close; 1027 goto out_close;
@@ -1356,6 +1372,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
1356static int nfsd_fill_super(struct super_block * sb, void * data, int silent) 1372static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1357{ 1373{
1358 static struct tree_descr nfsd_files[] = { 1374 static struct tree_descr nfsd_files[] = {
1375#ifdef CONFIG_NFSD_DEPRECATED
1359 [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, 1376 [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR},
1360 [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, 1377 [NFSD_Add] = {".add", &transaction_ops, S_IWUSR},
1361 [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, 1378 [NFSD_Del] = {".del", &transaction_ops, S_IWUSR},
@@ -1363,6 +1380,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
1363 [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, 1380 [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR},
1364 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, 1381 [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
1365 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, 1382 [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
1383#endif
1366 [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, 1384 [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
1367 [NFSD_Export_features] = {"export_features", 1385 [NFSD_Export_features] = {"export_features",
1368 &export_features_operations, S_IRUGO}, 1386 &export_features_operations, S_IRUGO},
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index b76ac3a82e39..6b641cf2c19a 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -249,7 +249,7 @@ extern time_t nfsd4_grace;
249#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ 249#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
250#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ 250#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
251 251
252#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ 252#define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */
253 253
254/* 254/*
255 * The following attributes are currently not supported by the NFSv4 server: 255 * The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index e2c43464f237..2bae1d86f5f2 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -16,6 +16,7 @@
16#include <linux/lockd/bind.h> 16#include <linux/lockd/bind.h>
17#include <linux/nfsacl.h> 17#include <linux/nfsacl.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <net/net_namespace.h>
19#include "nfsd.h" 20#include "nfsd.h"
20#include "cache.h" 21#include "cache.h"
21#include "vfs.h" 22#include "vfs.h"
@@ -186,12 +187,12 @@ static int nfsd_init_socks(int port)
186 if (!list_empty(&nfsd_serv->sv_permsocks)) 187 if (!list_empty(&nfsd_serv->sv_permsocks))
187 return 0; 188 return 0;
188 189
189 error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, 190 error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port,
190 SVC_SOCK_DEFAULTS); 191 SVC_SOCK_DEFAULTS);
191 if (error < 0) 192 if (error < 0)
192 return error; 193 return error;
193 194
194 error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, 195 error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port,
195 SVC_SOCK_DEFAULTS); 196 SVC_SOCK_DEFAULTS);
196 if (error < 0) 197 if (error < 0)
197 return error; 198 return error;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 322518c88e4b..39adc27b0685 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -35,6 +35,7 @@
35#ifndef _NFSD4_STATE_H 35#ifndef _NFSD4_STATE_H
36#define _NFSD4_STATE_H 36#define _NFSD4_STATE_H
37 37
38#include <linux/sunrpc/svc_xprt.h>
38#include <linux/nfsd/nfsfh.h> 39#include <linux/nfsd/nfsfh.h>
39#include "nfsfh.h" 40#include "nfsfh.h"
40 41
@@ -64,19 +65,12 @@ typedef struct {
64 (s)->si_fileid, \ 65 (s)->si_fileid, \
65 (s)->si_generation 66 (s)->si_generation
66 67
67struct nfsd4_cb_sequence {
68 /* args/res */
69 u32 cbs_minorversion;
70 struct nfs4_client *cbs_clp;
71};
72
73struct nfs4_rpc_args {
74 void *args_op;
75 struct nfsd4_cb_sequence args_seq;
76};
77
78struct nfsd4_callback { 68struct nfsd4_callback {
79 struct nfs4_rpc_args cb_args; 69 void *cb_op;
70 struct nfs4_client *cb_clp;
71 u32 cb_minorversion;
72 struct rpc_message cb_msg;
73 const struct rpc_call_ops *cb_ops;
80 struct work_struct cb_work; 74 struct work_struct cb_work;
81}; 75};
82 76
@@ -91,7 +85,6 @@ struct nfs4_delegation {
91 u32 dl_type; 85 u32 dl_type;
92 time_t dl_time; 86 time_t dl_time;
93/* For recall: */ 87/* For recall: */
94 u32 dl_ident;
95 stateid_t dl_stateid; 88 stateid_t dl_stateid;
96 struct knfsd_fh dl_fh; 89 struct knfsd_fh dl_fh;
97 int dl_retries; 90 int dl_retries;
@@ -103,8 +96,8 @@ struct nfs4_cb_conn {
103 /* SETCLIENTID info */ 96 /* SETCLIENTID info */
104 struct sockaddr_storage cb_addr; 97 struct sockaddr_storage cb_addr;
105 size_t cb_addrlen; 98 size_t cb_addrlen;
106 u32 cb_prog; 99 u32 cb_prog; /* used only in 4.0 case;
107 u32 cb_minorversion; 100 per-session otherwise */
108 u32 cb_ident; /* minorversion 0 only */ 101 u32 cb_ident; /* minorversion 0 only */
109 struct svc_xprt *cb_xprt; /* minorversion 1 only */ 102 struct svc_xprt *cb_xprt; /* minorversion 1 only */
110}; 103};
@@ -160,6 +153,15 @@ struct nfsd4_clid_slot {
160 struct nfsd4_create_session sl_cr_ses; 153 struct nfsd4_create_session sl_cr_ses;
161}; 154};
162 155
156struct nfsd4_conn {
157 struct list_head cn_persession;
158 struct svc_xprt *cn_xprt;
159 struct svc_xpt_user cn_xpt_user;
160 struct nfsd4_session *cn_session;
161/* CDFC4_FORE, CDFC4_BACK: */
162 unsigned char cn_flags;
163};
164
163struct nfsd4_session { 165struct nfsd4_session {
164 struct kref se_ref; 166 struct kref se_ref;
165 struct list_head se_hash; /* hash by sessionid */ 167 struct list_head se_hash; /* hash by sessionid */
@@ -169,6 +171,9 @@ struct nfsd4_session {
169 struct nfs4_sessionid se_sessionid; 171 struct nfs4_sessionid se_sessionid;
170 struct nfsd4_channel_attrs se_fchannel; 172 struct nfsd4_channel_attrs se_fchannel;
171 struct nfsd4_channel_attrs se_bchannel; 173 struct nfsd4_channel_attrs se_bchannel;
174 struct list_head se_conns;
175 u32 se_cb_prog;
176 u32 se_cb_seq_nr;
172 struct nfsd4_slot *se_slots[]; /* forward channel slots */ 177 struct nfsd4_slot *se_slots[]; /* forward channel slots */
173}; 178};
174 179
@@ -221,24 +226,32 @@ struct nfs4_client {
221 clientid_t cl_clientid; /* generated by server */ 226 clientid_t cl_clientid; /* generated by server */
222 nfs4_verifier cl_confirm; /* generated by server */ 227 nfs4_verifier cl_confirm; /* generated by server */
223 u32 cl_firststate; /* recovery dir creation */ 228 u32 cl_firststate; /* recovery dir creation */
229 u32 cl_minorversion;
224 230
225 /* for v4.0 and v4.1 callbacks: */ 231 /* for v4.0 and v4.1 callbacks: */
226 struct nfs4_cb_conn cl_cb_conn; 232 struct nfs4_cb_conn cl_cb_conn;
233#define NFSD4_CLIENT_CB_UPDATE 1
234#define NFSD4_CLIENT_KILL 2
235 unsigned long cl_cb_flags;
227 struct rpc_clnt *cl_cb_client; 236 struct rpc_clnt *cl_cb_client;
237 u32 cl_cb_ident;
228 atomic_t cl_cb_set; 238 atomic_t cl_cb_set;
239 struct nfsd4_callback cl_cb_null;
240 struct nfsd4_session *cl_cb_session;
241
242 /* for all client information that callback code might need: */
243 spinlock_t cl_lock;
229 244
230 /* for nfs41 */ 245 /* for nfs41 */
231 struct list_head cl_sessions; 246 struct list_head cl_sessions;
232 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ 247 struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
233 u32 cl_exchange_flags; 248 u32 cl_exchange_flags;
234 struct nfs4_sessionid cl_sessionid;
235 /* number of rpc's in progress over an associated session: */ 249 /* number of rpc's in progress over an associated session: */
236 atomic_t cl_refcount; 250 atomic_t cl_refcount;
237 251
238 /* for nfs41 callbacks */ 252 /* for nfs41 callbacks */
239 /* We currently support a single back channel with a single slot */ 253 /* We currently support a single back channel with a single slot */
240 unsigned long cl_cb_slot_busy; 254 unsigned long cl_cb_slot_busy;
241 u32 cl_cb_seq_nr;
242 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ 255 struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
243 /* wait here for slots */ 256 /* wait here for slots */
244}; 257};
@@ -440,12 +453,13 @@ extern int nfs4_in_grace(void);
440extern __be32 nfs4_check_open_reclaim(clientid_t *clid); 453extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
441extern void nfs4_free_stateowner(struct kref *kref); 454extern void nfs4_free_stateowner(struct kref *kref);
442extern int set_callback_cred(void); 455extern int set_callback_cred(void);
443extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); 456extern void nfsd4_probe_callback(struct nfs4_client *clp);
457extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
444extern void nfsd4_do_callback_rpc(struct work_struct *); 458extern void nfsd4_do_callback_rpc(struct work_struct *);
445extern void nfsd4_cb_recall(struct nfs4_delegation *dp); 459extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
446extern int nfsd4_create_callback_queue(void); 460extern int nfsd4_create_callback_queue(void);
447extern void nfsd4_destroy_callback_queue(void); 461extern void nfsd4_destroy_callback_queue(void);
448extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *); 462extern void nfsd4_shutdown_callback(struct nfs4_client *);
449extern void nfs4_put_delegation(struct nfs4_delegation *dp); 463extern void nfs4_put_delegation(struct nfs4_delegation *dp);
450extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); 464extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
451extern void nfsd4_init_recdir(char *recdir_name); 465extern void nfsd4_init_recdir(char *recdir_name);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index d926af626177..687d090cea34 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1609,7 +1609,7 @@ nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
1609 kunmap_atomic(kaddr, KM_USER0); 1609 kunmap_atomic(kaddr, KM_USER0);
1610 1610
1611 if (!TestSetPageWriteback(clone_page)) 1611 if (!TestSetPageWriteback(clone_page))
1612 inc_zone_page_state(clone_page, NR_WRITEBACK); 1612 account_page_writeback(clone_page);
1613 unlock_page(clone_page); 1613 unlock_page(clone_page);
1614 1614
1615 return 0; 1615 return 0;
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 50f8f0600f06..6a0068841d96 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -33,8 +33,8 @@ config PROC_KCORE
33 depends on PROC_FS && MMU 33 depends on PROC_FS && MMU
34 34
35config PROC_VMCORE 35config PROC_VMCORE
36 bool "/proc/vmcore support (EXPERIMENTAL)" 36 bool "/proc/vmcore support"
37 depends on PROC_FS && CRASH_DUMP 37 depends on PROC_FS && CRASH_DUMP
38 default y 38 default y
39 help 39 help
40 Exports the dump image of crashed kernel in ELF format. 40 Exports the dump image of crashed kernel in ELF format.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9883f1e18332..9b094c1c8465 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1025,28 +1025,47 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1025 memset(buffer, 0, sizeof(buffer)); 1025 memset(buffer, 0, sizeof(buffer));
1026 if (count > sizeof(buffer) - 1) 1026 if (count > sizeof(buffer) - 1)
1027 count = sizeof(buffer) - 1; 1027 count = sizeof(buffer) - 1;
1028 if (copy_from_user(buffer, buf, count)) 1028 if (copy_from_user(buffer, buf, count)) {
1029 return -EFAULT; 1029 err = -EFAULT;
1030 goto out;
1031 }
1030 1032
1031 err = strict_strtol(strstrip(buffer), 0, &oom_adjust); 1033 err = strict_strtol(strstrip(buffer), 0, &oom_adjust);
1032 if (err) 1034 if (err)
1033 return -EINVAL; 1035 goto out;
1034 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && 1036 if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
1035 oom_adjust != OOM_DISABLE) 1037 oom_adjust != OOM_DISABLE) {
1036 return -EINVAL; 1038 err = -EINVAL;
1039 goto out;
1040 }
1037 1041
1038 task = get_proc_task(file->f_path.dentry->d_inode); 1042 task = get_proc_task(file->f_path.dentry->d_inode);
1039 if (!task) 1043 if (!task) {
1040 return -ESRCH; 1044 err = -ESRCH;
1045 goto out;
1046 }
1047
1048 task_lock(task);
1049 if (!task->mm) {
1050 err = -EINVAL;
1051 goto err_task_lock;
1052 }
1053
1041 if (!lock_task_sighand(task, &flags)) { 1054 if (!lock_task_sighand(task, &flags)) {
1042 put_task_struct(task); 1055 err = -ESRCH;
1043 return -ESRCH; 1056 goto err_task_lock;
1044 } 1057 }
1045 1058
1046 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { 1059 if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
1047 unlock_task_sighand(task, &flags); 1060 err = -EACCES;
1048 put_task_struct(task); 1061 goto err_sighand;
1049 return -EACCES; 1062 }
1063
1064 if (oom_adjust != task->signal->oom_adj) {
1065 if (oom_adjust == OOM_DISABLE)
1066 atomic_inc(&task->mm->oom_disable_count);
1067 if (task->signal->oom_adj == OOM_DISABLE)
1068 atomic_dec(&task->mm->oom_disable_count);
1050 } 1069 }
1051 1070
1052 /* 1071 /*
@@ -1067,10 +1086,13 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
1067 else 1086 else
1068 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / 1087 task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
1069 -OOM_DISABLE; 1088 -OOM_DISABLE;
1089err_sighand:
1070 unlock_task_sighand(task, &flags); 1090 unlock_task_sighand(task, &flags);
1091err_task_lock:
1092 task_unlock(task);
1071 put_task_struct(task); 1093 put_task_struct(task);
1072 1094out:
1073 return count; 1095 return err < 0 ? err : count;
1074} 1096}
1075 1097
1076static const struct file_operations proc_oom_adjust_operations = { 1098static const struct file_operations proc_oom_adjust_operations = {
@@ -1111,30 +1133,49 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1111 memset(buffer, 0, sizeof(buffer)); 1133 memset(buffer, 0, sizeof(buffer));
1112 if (count > sizeof(buffer) - 1) 1134 if (count > sizeof(buffer) - 1)
1113 count = sizeof(buffer) - 1; 1135 count = sizeof(buffer) - 1;
1114 if (copy_from_user(buffer, buf, count)) 1136 if (copy_from_user(buffer, buf, count)) {
1115 return -EFAULT; 1137 err = -EFAULT;
1138 goto out;
1139 }
1116 1140
1117 err = strict_strtol(strstrip(buffer), 0, &oom_score_adj); 1141 err = strict_strtol(strstrip(buffer), 0, &oom_score_adj);
1118 if (err) 1142 if (err)
1119 return -EINVAL; 1143 goto out;
1120 if (oom_score_adj < OOM_SCORE_ADJ_MIN || 1144 if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
1121 oom_score_adj > OOM_SCORE_ADJ_MAX) 1145 oom_score_adj > OOM_SCORE_ADJ_MAX) {
1122 return -EINVAL; 1146 err = -EINVAL;
1147 goto out;
1148 }
1123 1149
1124 task = get_proc_task(file->f_path.dentry->d_inode); 1150 task = get_proc_task(file->f_path.dentry->d_inode);
1125 if (!task) 1151 if (!task) {
1126 return -ESRCH; 1152 err = -ESRCH;
1153 goto out;
1154 }
1155
1156 task_lock(task);
1157 if (!task->mm) {
1158 err = -EINVAL;
1159 goto err_task_lock;
1160 }
1161
1127 if (!lock_task_sighand(task, &flags)) { 1162 if (!lock_task_sighand(task, &flags)) {
1128 put_task_struct(task); 1163 err = -ESRCH;
1129 return -ESRCH; 1164 goto err_task_lock;
1130 } 1165 }
1166
1131 if (oom_score_adj < task->signal->oom_score_adj && 1167 if (oom_score_adj < task->signal->oom_score_adj &&
1132 !capable(CAP_SYS_RESOURCE)) { 1168 !capable(CAP_SYS_RESOURCE)) {
1133 unlock_task_sighand(task, &flags); 1169 err = -EACCES;
1134 put_task_struct(task); 1170 goto err_sighand;
1135 return -EACCES;
1136 } 1171 }
1137 1172
1173 if (oom_score_adj != task->signal->oom_score_adj) {
1174 if (oom_score_adj == OOM_SCORE_ADJ_MIN)
1175 atomic_inc(&task->mm->oom_disable_count);
1176 if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
1177 atomic_dec(&task->mm->oom_disable_count);
1178 }
1138 task->signal->oom_score_adj = oom_score_adj; 1179 task->signal->oom_score_adj = oom_score_adj;
1139 /* 1180 /*
1140 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is 1181 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
@@ -1145,9 +1186,13 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
1145 else 1186 else
1146 task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / 1187 task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
1147 OOM_SCORE_ADJ_MAX; 1188 OOM_SCORE_ADJ_MAX;
1189err_sighand:
1148 unlock_task_sighand(task, &flags); 1190 unlock_task_sighand(task, &flags);
1191err_task_lock:
1192 task_unlock(task);
1149 put_task_struct(task); 1193 put_task_struct(task);
1150 return count; 1194out:
1195 return err < 0 ? err : count;
1151} 1196}
1152 1197
1153static const struct file_operations proc_oom_score_adj_operations = { 1198static const struct file_operations proc_oom_score_adj_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 4dcb88046030..41656d40dc5c 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2437,7 +2437,7 @@ static int reiserfs_write_full_page(struct page *page,
2437 /* from this point on, we know the buffer is mapped to a 2437 /* from this point on, we know the buffer is mapped to a
2438 * real block and not a direct item 2438 * real block and not a direct item
2439 */ 2439 */
2440 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 2440 if (wbc->sync_mode != WB_SYNC_NONE) {
2441 lock_buffer(bh); 2441 lock_buffer(bh);
2442 } else { 2442 } else {
2443 if (!trylock_buffer(bh)) { 2443 if (!trylock_buffer(bh)) {
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 74047304b01a..492465b451dd 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -99,6 +99,16 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
99#ifdef __ARCH_SI_TRAPNO 99#ifdef __ARCH_SI_TRAPNO
100 err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno); 100 err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno);
101#endif 101#endif
102#ifdef BUS_MCEERR_AO
103 /*
104 * Other callers might not initialize the si_lsb field,
105 * so check explicitly for the right codes here.
106 */
107 if (kinfo->si_code == BUS_MCEERR_AR ||
108 kinfo->si_code == BUS_MCEERR_AO)
109 err |= __put_user((short) kinfo->si_addr_lsb,
110 &uinfo->ssi_addr_lsb);
111#endif
102 break; 112 break;
103 case __SI_CHLD: 113 case __SI_CHLD:
104 err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); 114 err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index b552f816de15..c9af48fffcd7 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1139,8 +1139,7 @@ xfs_vm_writepage(
1139 type = IO_DELAY; 1139 type = IO_DELAY;
1140 flags = BMAPI_ALLOCATE; 1140 flags = BMAPI_ALLOCATE;
1141 1141
1142 if (wbc->sync_mode == WB_SYNC_NONE && 1142 if (wbc->sync_mode == WB_SYNC_NONE)
1143 wbc->nonblocking)
1144 flags |= BMAPI_TRYLOCK; 1143 flags |= BMAPI_TRYLOCK;
1145 } 1144 }
1146 1145