diff options
author | Mel Gorman <mgorman@suse.de> | 2012-07-31 19:45:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 21:42:48 -0400 |
commit | a564b8f0398636ba30b07c0eaebdef7ff7837249 (patch) | |
tree | 10478aa5cfb2a3696db34618a479413b358c3831 /fs/nfs | |
parent | 29418aa4bd487c82016733ef5c6a06d656ed3c7d (diff) |
nfs: enable swap on NFS
Implement the new swapfile a_ops for NFS and hook up ->direct_IO. This
will set the NFS socket to SOCK_MEMALLOC and run socket reconnect under
PF_MEMALLOC as well as reset SOCK_MEMALLOC before engaging the protocol
->connect() method.
PF_MEMALLOC should allow the allocation of struct socket and related
objects and the early (re)setting of SOCK_MEMALLOC should allow us to
receive the packets required for the TCP connection buildup.
[jlayton@redhat.com: Restore PF_MEMALLOC task flags in all cases]
[dfeng@redhat.com: Fix handling of multiple swap files]
[a.p.zijlstra@chello.nl: Original patch]
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Eric B Munson <emunson@mgebm.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Neil Brown <neilb@suse.de>
Cc: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Xiaotian Feng <dfeng@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/nfs')
-rw-r--r-- | fs/nfs/Kconfig | 8 | ||||
-rw-r--r-- | fs/nfs/direct.c | 82 | ||||
-rw-r--r-- | fs/nfs/file.c | 22 |
3 files changed, 82 insertions, 30 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 404c6a8ac394..6fd5f2cdcd1e 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -86,6 +86,14 @@ config NFS_V4 | |||
86 | 86 | ||
87 | If unsure, say Y. | 87 | If unsure, say Y. |
88 | 88 | ||
89 | config NFS_SWAP | ||
90 | bool "Provide swap over NFS support" | ||
91 | default n | ||
92 | depends on NFS_FS | ||
93 | select SUNRPC_SWAP | ||
94 | help | ||
95 | This option enables swapon to work on files located on NFS mounts. | ||
96 | |||
89 | config NFS_V4_1 | 97 | config NFS_V4_1 |
90 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" | 98 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" |
91 | depends on NFS_V4 && EXPERIMENTAL | 99 | depends on NFS_V4 && EXPERIMENTAL |
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 42dce909ec70..bf9c8d0ec16a 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c | |||
@@ -115,17 +115,28 @@ static inline int put_dreq(struct nfs_direct_req *dreq) | |||
115 | * @nr_segs: size of iovec array | 115 | * @nr_segs: size of iovec array |
116 | * | 116 | * |
117 | * The presence of this routine in the address space ops vector means | 117 | * The presence of this routine in the address space ops vector means |
118 | * the NFS client supports direct I/O. However, we shunt off direct | 118 | * the NFS client supports direct I/O. However, for most direct IO, we |
119 | * read and write requests before the VFS gets them, so this method | 119 | * shunt off direct read and write requests before the VFS gets them, |
120 | * should never be called. | 120 | * so this method is only ever called for swap. |
121 | */ | 121 | */ |
122 | ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) | 122 | ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) |
123 | { | 123 | { |
124 | #ifndef CONFIG_NFS_SWAP | ||
124 | dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", | 125 | dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", |
125 | iocb->ki_filp->f_path.dentry->d_name.name, | 126 | iocb->ki_filp->f_path.dentry->d_name.name, |
126 | (long long) pos, nr_segs); | 127 | (long long) pos, nr_segs); |
127 | 128 | ||
128 | return -EINVAL; | 129 | return -EINVAL; |
130 | #else | ||
131 | VM_BUG_ON(iocb->ki_left != PAGE_SIZE); | ||
132 | VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); | ||
133 | |||
134 | if (rw == READ || rw == KERNEL_READ) | ||
135 | return nfs_file_direct_read(iocb, iov, nr_segs, pos, | ||
136 | rw == READ ? true : false); | ||
137 | return nfs_file_direct_write(iocb, iov, nr_segs, pos, | ||
138 | rw == WRITE ? true : false); | ||
139 | #endif /* CONFIG_NFS_SWAP */ | ||
129 | } | 140 | } |
130 | 141 | ||
131 | static void nfs_direct_release_pages(struct page **pages, unsigned int npages) | 142 | static void nfs_direct_release_pages(struct page **pages, unsigned int npages) |
@@ -303,7 +314,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { | |||
303 | */ | 314 | */ |
304 | static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, | 315 | static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, |
305 | const struct iovec *iov, | 316 | const struct iovec *iov, |
306 | loff_t pos) | 317 | loff_t pos, bool uio) |
307 | { | 318 | { |
308 | struct nfs_direct_req *dreq = desc->pg_dreq; | 319 | struct nfs_direct_req *dreq = desc->pg_dreq; |
309 | struct nfs_open_context *ctx = dreq->ctx; | 320 | struct nfs_open_context *ctx = dreq->ctx; |
@@ -331,12 +342,20 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de | |||
331 | GFP_KERNEL); | 342 | GFP_KERNEL); |
332 | if (!pagevec) | 343 | if (!pagevec) |
333 | break; | 344 | break; |
334 | down_read(¤t->mm->mmap_sem); | 345 | if (uio) { |
335 | result = get_user_pages(current, current->mm, user_addr, | 346 | down_read(¤t->mm->mmap_sem); |
347 | result = get_user_pages(current, current->mm, user_addr, | ||
336 | npages, 1, 0, pagevec, NULL); | 348 | npages, 1, 0, pagevec, NULL); |
337 | up_read(¤t->mm->mmap_sem); | 349 | up_read(¤t->mm->mmap_sem); |
338 | if (result < 0) | 350 | if (result < 0) |
339 | break; | 351 | break; |
352 | } else { | ||
353 | WARN_ON(npages != 1); | ||
354 | result = get_kernel_page(user_addr, 1, pagevec); | ||
355 | if (WARN_ON(result != 1)) | ||
356 | break; | ||
357 | } | ||
358 | |||
340 | if ((unsigned)result < npages) { | 359 | if ((unsigned)result < npages) { |
341 | bytes = result * PAGE_SIZE; | 360 | bytes = result * PAGE_SIZE; |
342 | if (bytes <= pgbase) { | 361 | if (bytes <= pgbase) { |
@@ -386,7 +405,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de | |||
386 | static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | 405 | static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, |
387 | const struct iovec *iov, | 406 | const struct iovec *iov, |
388 | unsigned long nr_segs, | 407 | unsigned long nr_segs, |
389 | loff_t pos) | 408 | loff_t pos, bool uio) |
390 | { | 409 | { |
391 | struct nfs_pageio_descriptor desc; | 410 | struct nfs_pageio_descriptor desc; |
392 | ssize_t result = -EINVAL; | 411 | ssize_t result = -EINVAL; |
@@ -400,7 +419,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | |||
400 | 419 | ||
401 | for (seg = 0; seg < nr_segs; seg++) { | 420 | for (seg = 0; seg < nr_segs; seg++) { |
402 | const struct iovec *vec = &iov[seg]; | 421 | const struct iovec *vec = &iov[seg]; |
403 | result = nfs_direct_read_schedule_segment(&desc, vec, pos); | 422 | result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); |
404 | if (result < 0) | 423 | if (result < 0) |
405 | break; | 424 | break; |
406 | requested_bytes += result; | 425 | requested_bytes += result; |
@@ -426,7 +445,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, | |||
426 | } | 445 | } |
427 | 446 | ||
428 | static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, | 447 | static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, |
429 | unsigned long nr_segs, loff_t pos) | 448 | unsigned long nr_segs, loff_t pos, bool uio) |
430 | { | 449 | { |
431 | ssize_t result = -ENOMEM; | 450 | ssize_t result = -ENOMEM; |
432 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 451 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
@@ -444,7 +463,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, | |||
444 | if (!is_sync_kiocb(iocb)) | 463 | if (!is_sync_kiocb(iocb)) |
445 | dreq->iocb = iocb; | 464 | dreq->iocb = iocb; |
446 | 465 | ||
447 | result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos); | 466 | result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); |
448 | if (!result) | 467 | if (!result) |
449 | result = nfs_direct_wait(dreq); | 468 | result = nfs_direct_wait(dreq); |
450 | NFS_I(inode)->read_io += result; | 469 | NFS_I(inode)->read_io += result; |
@@ -610,7 +629,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode | |||
610 | */ | 629 | */ |
611 | static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, | 630 | static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, |
612 | const struct iovec *iov, | 631 | const struct iovec *iov, |
613 | loff_t pos) | 632 | loff_t pos, bool uio) |
614 | { | 633 | { |
615 | struct nfs_direct_req *dreq = desc->pg_dreq; | 634 | struct nfs_direct_req *dreq = desc->pg_dreq; |
616 | struct nfs_open_context *ctx = dreq->ctx; | 635 | struct nfs_open_context *ctx = dreq->ctx; |
@@ -638,12 +657,19 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d | |||
638 | if (!pagevec) | 657 | if (!pagevec) |
639 | break; | 658 | break; |
640 | 659 | ||
641 | down_read(¤t->mm->mmap_sem); | 660 | if (uio) { |
642 | result = get_user_pages(current, current->mm, user_addr, | 661 | down_read(¤t->mm->mmap_sem); |
643 | npages, 0, 0, pagevec, NULL); | 662 | result = get_user_pages(current, current->mm, user_addr, |
644 | up_read(¤t->mm->mmap_sem); | 663 | npages, 0, 0, pagevec, NULL); |
645 | if (result < 0) | 664 | up_read(¤t->mm->mmap_sem); |
646 | break; | 665 | if (result < 0) |
666 | break; | ||
667 | } else { | ||
668 | WARN_ON(npages != 1); | ||
669 | result = get_kernel_page(user_addr, 0, pagevec); | ||
670 | if (WARN_ON(result != 1)) | ||
671 | break; | ||
672 | } | ||
647 | 673 | ||
648 | if ((unsigned)result < npages) { | 674 | if ((unsigned)result < npages) { |
649 | bytes = result * PAGE_SIZE; | 675 | bytes = result * PAGE_SIZE; |
@@ -774,7 +800,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { | |||
774 | static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | 800 | static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, |
775 | const struct iovec *iov, | 801 | const struct iovec *iov, |
776 | unsigned long nr_segs, | 802 | unsigned long nr_segs, |
777 | loff_t pos) | 803 | loff_t pos, bool uio) |
778 | { | 804 | { |
779 | struct nfs_pageio_descriptor desc; | 805 | struct nfs_pageio_descriptor desc; |
780 | struct inode *inode = dreq->inode; | 806 | struct inode *inode = dreq->inode; |
@@ -790,7 +816,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
790 | 816 | ||
791 | for (seg = 0; seg < nr_segs; seg++) { | 817 | for (seg = 0; seg < nr_segs; seg++) { |
792 | const struct iovec *vec = &iov[seg]; | 818 | const struct iovec *vec = &iov[seg]; |
793 | result = nfs_direct_write_schedule_segment(&desc, vec, pos); | 819 | result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); |
794 | if (result < 0) | 820 | if (result < 0) |
795 | break; | 821 | break; |
796 | requested_bytes += result; | 822 | requested_bytes += result; |
@@ -818,7 +844,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, | |||
818 | 844 | ||
819 | static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, | 845 | static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, |
820 | unsigned long nr_segs, loff_t pos, | 846 | unsigned long nr_segs, loff_t pos, |
821 | size_t count) | 847 | size_t count, bool uio) |
822 | { | 848 | { |
823 | ssize_t result = -ENOMEM; | 849 | ssize_t result = -ENOMEM; |
824 | struct inode *inode = iocb->ki_filp->f_mapping->host; | 850 | struct inode *inode = iocb->ki_filp->f_mapping->host; |
@@ -836,7 +862,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
836 | if (!is_sync_kiocb(iocb)) | 862 | if (!is_sync_kiocb(iocb)) |
837 | dreq->iocb = iocb; | 863 | dreq->iocb = iocb; |
838 | 864 | ||
839 | result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos); | 865 | result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); |
840 | if (!result) | 866 | if (!result) |
841 | result = nfs_direct_wait(dreq); | 867 | result = nfs_direct_wait(dreq); |
842 | out_release: | 868 | out_release: |
@@ -867,7 +893,7 @@ out: | |||
867 | * cache. | 893 | * cache. |
868 | */ | 894 | */ |
869 | ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, | 895 | ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, |
870 | unsigned long nr_segs, loff_t pos) | 896 | unsigned long nr_segs, loff_t pos, bool uio) |
871 | { | 897 | { |
872 | ssize_t retval = -EINVAL; | 898 | ssize_t retval = -EINVAL; |
873 | struct file *file = iocb->ki_filp; | 899 | struct file *file = iocb->ki_filp; |
@@ -892,7 +918,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, | |||
892 | 918 | ||
893 | task_io_account_read(count); | 919 | task_io_account_read(count); |
894 | 920 | ||
895 | retval = nfs_direct_read(iocb, iov, nr_segs, pos); | 921 | retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio); |
896 | if (retval > 0) | 922 | if (retval > 0) |
897 | iocb->ki_pos = pos + retval; | 923 | iocb->ki_pos = pos + retval; |
898 | 924 | ||
@@ -923,7 +949,7 @@ out: | |||
923 | * is no atomic O_APPEND write facility in the NFS protocol. | 949 | * is no atomic O_APPEND write facility in the NFS protocol. |
924 | */ | 950 | */ |
925 | ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | 951 | ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, |
926 | unsigned long nr_segs, loff_t pos) | 952 | unsigned long nr_segs, loff_t pos, bool uio) |
927 | { | 953 | { |
928 | ssize_t retval = -EINVAL; | 954 | ssize_t retval = -EINVAL; |
929 | struct file *file = iocb->ki_filp; | 955 | struct file *file = iocb->ki_filp; |
@@ -955,7 +981,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
955 | 981 | ||
956 | task_io_account_write(count); | 982 | task_io_account_write(count); |
957 | 983 | ||
958 | retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); | 984 | retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio); |
959 | if (retval > 0) { | 985 | if (retval > 0) { |
960 | struct inode *inode = mapping->host; | 986 | struct inode *inode = mapping->host; |
961 | 987 | ||
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index acd4e4cd2906..50fb83a88b1b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -175,7 +175,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, | |||
175 | ssize_t result; | 175 | ssize_t result; |
176 | 176 | ||
177 | if (iocb->ki_filp->f_flags & O_DIRECT) | 177 | if (iocb->ki_filp->f_flags & O_DIRECT) |
178 | return nfs_file_direct_read(iocb, iov, nr_segs, pos); | 178 | return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); |
179 | 179 | ||
180 | dprintk("NFS: read(%s/%s, %lu@%lu)\n", | 180 | dprintk("NFS: read(%s/%s, %lu@%lu)\n", |
181 | dentry->d_parent->d_name.name, dentry->d_name.name, | 181 | dentry->d_parent->d_name.name, dentry->d_name.name, |
@@ -482,6 +482,20 @@ static int nfs_launder_page(struct page *page) | |||
482 | return nfs_wb_page(inode, page); | 482 | return nfs_wb_page(inode, page); |
483 | } | 483 | } |
484 | 484 | ||
485 | #ifdef CONFIG_NFS_SWAP | ||
486 | static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, | ||
487 | sector_t *span) | ||
488 | { | ||
489 | *span = sis->pages; | ||
490 | return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1); | ||
491 | } | ||
492 | |||
493 | static void nfs_swap_deactivate(struct file *file) | ||
494 | { | ||
495 | xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0); | ||
496 | } | ||
497 | #endif | ||
498 | |||
485 | const struct address_space_operations nfs_file_aops = { | 499 | const struct address_space_operations nfs_file_aops = { |
486 | .readpage = nfs_readpage, | 500 | .readpage = nfs_readpage, |
487 | .readpages = nfs_readpages, | 501 | .readpages = nfs_readpages, |
@@ -496,6 +510,10 @@ const struct address_space_operations nfs_file_aops = { | |||
496 | .migratepage = nfs_migrate_page, | 510 | .migratepage = nfs_migrate_page, |
497 | .launder_page = nfs_launder_page, | 511 | .launder_page = nfs_launder_page, |
498 | .error_remove_page = generic_error_remove_page, | 512 | .error_remove_page = generic_error_remove_page, |
513 | #ifdef CONFIG_NFS_SWAP | ||
514 | .swap_activate = nfs_swap_activate, | ||
515 | .swap_deactivate = nfs_swap_deactivate, | ||
516 | #endif | ||
499 | }; | 517 | }; |
500 | 518 | ||
501 | /* | 519 | /* |
@@ -570,7 +588,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
570 | size_t count = iov_length(iov, nr_segs); | 588 | size_t count = iov_length(iov, nr_segs); |
571 | 589 | ||
572 | if (iocb->ki_filp->f_flags & O_DIRECT) | 590 | if (iocb->ki_filp->f_flags & O_DIRECT) |
573 | return nfs_file_direct_write(iocb, iov, nr_segs, pos); | 591 | return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); |
574 | 592 | ||
575 | dprintk("NFS: write(%s/%s, %lu@%Ld)\n", | 593 | dprintk("NFS: write(%s/%s, %lu@%Ld)\n", |
576 | dentry->d_parent->d_name.name, dentry->d_name.name, | 594 | dentry->d_parent->d_name.name, dentry->d_name.name, |