aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMel Gorman <mgorman@suse.de>2012-07-31 19:45:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-07-31 21:42:48 -0400
commita564b8f0398636ba30b07c0eaebdef7ff7837249 (patch)
tree10478aa5cfb2a3696db34618a479413b358c3831 /fs
parent29418aa4bd487c82016733ef5c6a06d656ed3c7d (diff)
nfs: enable swap on NFS
Implement the new swapfile a_ops for NFS and hook up ->direct_IO. This will set the NFS socket to SOCK_MEMALLOC and run socket reconnect under PF_MEMALLOC as well as reset SOCK_MEMALLOC before engaging the protocol ->connect() method. PF_MEMALLOC should allow the allocation of struct socket and related objects and the early (re)setting of SOCK_MEMALLOC should allow us to receive the packets required for the TCP connection buildup. [jlayton@redhat.com: Restore PF_MEMALLOC task flags in all cases] [dfeng@redhat.com: Fix handling of multiple swap files] [a.p.zijlstra@chello.nl: Original patch] Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: David S. Miller <davem@davemloft.net> Cc: Eric B Munson <emunson@mgebm.net> Cc: Eric Paris <eparis@redhat.com> Cc: James Morris <jmorris@namei.org> Cc: Mel Gorman <mgorman@suse.de> Cc: Mike Christie <michaelc@cs.wisc.edu> Cc: Neil Brown <neilb@suse.de> Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc> Cc: Trond Myklebust <Trond.Myklebust@netapp.com> Cc: Xiaotian Feng <dfeng@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/nfs/Kconfig8
-rw-r--r--fs/nfs/direct.c82
-rw-r--r--fs/nfs/file.c22
3 files changed, 82 insertions, 30 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 404c6a8ac394..6fd5f2cdcd1e 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -86,6 +86,14 @@ config NFS_V4
86 86
87 If unsure, say Y. 87 If unsure, say Y.
88 88
89config NFS_SWAP
90 bool "Provide swap over NFS support"
91 default n
92 depends on NFS_FS
93 select SUNRPC_SWAP
94 help
95 This option enables swapon to work on files located on NFS mounts.
96
89config NFS_V4_1 97config NFS_V4_1
90 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" 98 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
91 depends on NFS_V4 && EXPERIMENTAL 99 depends on NFS_V4 && EXPERIMENTAL
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 42dce909ec70..bf9c8d0ec16a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -115,17 +115,28 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
115 * @nr_segs: size of iovec array 115 * @nr_segs: size of iovec array
116 * 116 *
117 * The presence of this routine in the address space ops vector means 117 * The presence of this routine in the address space ops vector means
118 * the NFS client supports direct I/O. However, we shunt off direct 118 * the NFS client supports direct I/O. However, for most direct IO, we
119 * read and write requests before the VFS gets them, so this method 119 * shunt off direct read and write requests before the VFS gets them,
120 * should never be called. 120 * so this method is only ever called for swap.
121 */ 121 */
122ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) 122ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
123{ 123{
124#ifndef CONFIG_NFS_SWAP
124 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", 125 dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
125 iocb->ki_filp->f_path.dentry->d_name.name, 126 iocb->ki_filp->f_path.dentry->d_name.name,
126 (long long) pos, nr_segs); 127 (long long) pos, nr_segs);
127 128
128 return -EINVAL; 129 return -EINVAL;
130#else
131 VM_BUG_ON(iocb->ki_left != PAGE_SIZE);
132 VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
133
134 if (rw == READ || rw == KERNEL_READ)
135 return nfs_file_direct_read(iocb, iov, nr_segs, pos,
136 rw == READ ? true : false);
137 return nfs_file_direct_write(iocb, iov, nr_segs, pos,
138 rw == WRITE ? true : false);
139#endif /* CONFIG_NFS_SWAP */
129} 140}
130 141
131static void nfs_direct_release_pages(struct page **pages, unsigned int npages) 142static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
@@ -303,7 +314,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
303 */ 314 */
304static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, 315static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
305 const struct iovec *iov, 316 const struct iovec *iov,
306 loff_t pos) 317 loff_t pos, bool uio)
307{ 318{
308 struct nfs_direct_req *dreq = desc->pg_dreq; 319 struct nfs_direct_req *dreq = desc->pg_dreq;
309 struct nfs_open_context *ctx = dreq->ctx; 320 struct nfs_open_context *ctx = dreq->ctx;
@@ -331,12 +342,20 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
331 GFP_KERNEL); 342 GFP_KERNEL);
332 if (!pagevec) 343 if (!pagevec)
333 break; 344 break;
334 down_read(&current->mm->mmap_sem); 345 if (uio) {
335 result = get_user_pages(current, current->mm, user_addr, 346 down_read(&current->mm->mmap_sem);
347 result = get_user_pages(current, current->mm, user_addr,
336 npages, 1, 0, pagevec, NULL); 348 npages, 1, 0, pagevec, NULL);
337 up_read(&current->mm->mmap_sem); 349 up_read(&current->mm->mmap_sem);
338 if (result < 0) 350 if (result < 0)
339 break; 351 break;
352 } else {
353 WARN_ON(npages != 1);
354 result = get_kernel_page(user_addr, 1, pagevec);
355 if (WARN_ON(result != 1))
356 break;
357 }
358
340 if ((unsigned)result < npages) { 359 if ((unsigned)result < npages) {
341 bytes = result * PAGE_SIZE; 360 bytes = result * PAGE_SIZE;
342 if (bytes <= pgbase) { 361 if (bytes <= pgbase) {
@@ -386,7 +405,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
386static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, 405static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
387 const struct iovec *iov, 406 const struct iovec *iov,
388 unsigned long nr_segs, 407 unsigned long nr_segs,
389 loff_t pos) 408 loff_t pos, bool uio)
390{ 409{
391 struct nfs_pageio_descriptor desc; 410 struct nfs_pageio_descriptor desc;
392 ssize_t result = -EINVAL; 411 ssize_t result = -EINVAL;
@@ -400,7 +419,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
400 419
401 for (seg = 0; seg < nr_segs; seg++) { 420 for (seg = 0; seg < nr_segs; seg++) {
402 const struct iovec *vec = &iov[seg]; 421 const struct iovec *vec = &iov[seg];
403 result = nfs_direct_read_schedule_segment(&desc, vec, pos); 422 result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
404 if (result < 0) 423 if (result < 0)
405 break; 424 break;
406 requested_bytes += result; 425 requested_bytes += result;
@@ -426,7 +445,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
426} 445}
427 446
428static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, 447static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
429 unsigned long nr_segs, loff_t pos) 448 unsigned long nr_segs, loff_t pos, bool uio)
430{ 449{
431 ssize_t result = -ENOMEM; 450 ssize_t result = -ENOMEM;
432 struct inode *inode = iocb->ki_filp->f_mapping->host; 451 struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -444,7 +463,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
444 if (!is_sync_kiocb(iocb)) 463 if (!is_sync_kiocb(iocb))
445 dreq->iocb = iocb; 464 dreq->iocb = iocb;
446 465
447 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); 466 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
448 if (!result) 467 if (!result)
449 result = nfs_direct_wait(dreq); 468 result = nfs_direct_wait(dreq);
450 NFS_I(inode)->read_io += result; 469 NFS_I(inode)->read_io += result;
@@ -610,7 +629,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
610 */ 629 */
611static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, 630static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
612 const struct iovec *iov, 631 const struct iovec *iov,
613 loff_t pos) 632 loff_t pos, bool uio)
614{ 633{
615 struct nfs_direct_req *dreq = desc->pg_dreq; 634 struct nfs_direct_req *dreq = desc->pg_dreq;
616 struct nfs_open_context *ctx = dreq->ctx; 635 struct nfs_open_context *ctx = dreq->ctx;
@@ -638,12 +657,19 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
638 if (!pagevec) 657 if (!pagevec)
639 break; 658 break;
640 659
641 down_read(&current->mm->mmap_sem); 660 if (uio) {
642 result = get_user_pages(current, current->mm, user_addr, 661 down_read(&current->mm->mmap_sem);
643 npages, 0, 0, pagevec, NULL); 662 result = get_user_pages(current, current->mm, user_addr,
644 up_read(&current->mm->mmap_sem); 663 npages, 0, 0, pagevec, NULL);
645 if (result < 0) 664 up_read(&current->mm->mmap_sem);
646 break; 665 if (result < 0)
666 break;
667 } else {
668 WARN_ON(npages != 1);
669 result = get_kernel_page(user_addr, 0, pagevec);
670 if (WARN_ON(result != 1))
671 break;
672 }
647 673
648 if ((unsigned)result < npages) { 674 if ((unsigned)result < npages) {
649 bytes = result * PAGE_SIZE; 675 bytes = result * PAGE_SIZE;
@@ -774,7 +800,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
774static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 800static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
775 const struct iovec *iov, 801 const struct iovec *iov,
776 unsigned long nr_segs, 802 unsigned long nr_segs,
777 loff_t pos) 803 loff_t pos, bool uio)
778{ 804{
779 struct nfs_pageio_descriptor desc; 805 struct nfs_pageio_descriptor desc;
780 struct inode *inode = dreq->inode; 806 struct inode *inode = dreq->inode;
@@ -790,7 +816,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
790 816
791 for (seg = 0; seg < nr_segs; seg++) { 817 for (seg = 0; seg < nr_segs; seg++) {
792 const struct iovec *vec = &iov[seg]; 818 const struct iovec *vec = &iov[seg];
793 result = nfs_direct_write_schedule_segment(&desc, vec, pos); 819 result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
794 if (result < 0) 820 if (result < 0)
795 break; 821 break;
796 requested_bytes += result; 822 requested_bytes += result;
@@ -818,7 +844,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
818 844
819static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, 845static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
820 unsigned long nr_segs, loff_t pos, 846 unsigned long nr_segs, loff_t pos,
821 size_t count) 847 size_t count, bool uio)
822{ 848{
823 ssize_t result = -ENOMEM; 849 ssize_t result = -ENOMEM;
824 struct inode *inode = iocb->ki_filp->f_mapping->host; 850 struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -836,7 +862,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
836 if (!is_sync_kiocb(iocb)) 862 if (!is_sync_kiocb(iocb))
837 dreq->iocb = iocb; 863 dreq->iocb = iocb;
838 864
839 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos); 865 result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
840 if (!result) 866 if (!result)
841 result = nfs_direct_wait(dreq); 867 result = nfs_direct_wait(dreq);
842out_release: 868out_release:
@@ -867,7 +893,7 @@ out:
867 * cache. 893 * cache.
868 */ 894 */
869ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, 895ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
870 unsigned long nr_segs, loff_t pos) 896 unsigned long nr_segs, loff_t pos, bool uio)
871{ 897{
872 ssize_t retval = -EINVAL; 898 ssize_t retval = -EINVAL;
873 struct file *file = iocb->ki_filp; 899 struct file *file = iocb->ki_filp;
@@ -892,7 +918,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
892 918
893 task_io_account_read(count); 919 task_io_account_read(count);
894 920
895 retval = nfs_direct_read(iocb, iov, nr_segs, pos); 921 retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
896 if (retval > 0) 922 if (retval > 0)
897 iocb->ki_pos = pos + retval; 923 iocb->ki_pos = pos + retval;
898 924
@@ -923,7 +949,7 @@ out:
923 * is no atomic O_APPEND write facility in the NFS protocol. 949 * is no atomic O_APPEND write facility in the NFS protocol.
924 */ 950 */
925ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, 951ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
926 unsigned long nr_segs, loff_t pos) 952 unsigned long nr_segs, loff_t pos, bool uio)
927{ 953{
928 ssize_t retval = -EINVAL; 954 ssize_t retval = -EINVAL;
929 struct file *file = iocb->ki_filp; 955 struct file *file = iocb->ki_filp;
@@ -955,7 +981,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
955 981
956 task_io_account_write(count); 982 task_io_account_write(count);
957 983
958 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); 984 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
959 if (retval > 0) { 985 if (retval > 0) {
960 struct inode *inode = mapping->host; 986 struct inode *inode = mapping->host;
961 987
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index acd4e4cd2906..50fb83a88b1b 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -175,7 +175,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
175 ssize_t result; 175 ssize_t result;
176 176
177 if (iocb->ki_filp->f_flags & O_DIRECT) 177 if (iocb->ki_filp->f_flags & O_DIRECT)
178 return nfs_file_direct_read(iocb, iov, nr_segs, pos); 178 return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
179 179
180 dprintk("NFS: read(%s/%s, %lu@%lu)\n", 180 dprintk("NFS: read(%s/%s, %lu@%lu)\n",
181 dentry->d_parent->d_name.name, dentry->d_name.name, 181 dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -482,6 +482,20 @@ static int nfs_launder_page(struct page *page)
482 return nfs_wb_page(inode, page); 482 return nfs_wb_page(inode, page);
483} 483}
484 484
485#ifdef CONFIG_NFS_SWAP
486static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
487 sector_t *span)
488{
489 *span = sis->pages;
490 return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1);
491}
492
493static void nfs_swap_deactivate(struct file *file)
494{
495 xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0);
496}
497#endif
498
485const struct address_space_operations nfs_file_aops = { 499const struct address_space_operations nfs_file_aops = {
486 .readpage = nfs_readpage, 500 .readpage = nfs_readpage,
487 .readpages = nfs_readpages, 501 .readpages = nfs_readpages,
@@ -496,6 +510,10 @@ const struct address_space_operations nfs_file_aops = {
496 .migratepage = nfs_migrate_page, 510 .migratepage = nfs_migrate_page,
497 .launder_page = nfs_launder_page, 511 .launder_page = nfs_launder_page,
498 .error_remove_page = generic_error_remove_page, 512 .error_remove_page = generic_error_remove_page,
513#ifdef CONFIG_NFS_SWAP
514 .swap_activate = nfs_swap_activate,
515 .swap_deactivate = nfs_swap_deactivate,
516#endif
499}; 517};
500 518
501/* 519/*
@@ -570,7 +588,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
570 size_t count = iov_length(iov, nr_segs); 588 size_t count = iov_length(iov, nr_segs);
571 589
572 if (iocb->ki_filp->f_flags & O_DIRECT) 590 if (iocb->ki_filp->f_flags & O_DIRECT)
573 return nfs_file_direct_write(iocb, iov, nr_segs, pos); 591 return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
574 592
575 dprintk("NFS: write(%s/%s, %lu@%Ld)\n", 593 dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
576 dentry->d_parent->d_name.name, dentry->d_name.name, 594 dentry->d_parent->d_name.name, dentry->d_name.name,