diff options
-rw-r--r-- | fs/nfs/super.c | 4 | ||||
-rw-r--r-- | fs/nfs/sysctl.c | 8 | ||||
-rw-r--r-- | fs/nfs/write.c | 116 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 1 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 1 | ||||
-rw-r--r-- | include/linux/nfs_fs_sb.h | 1 | ||||
-rw-r--r-- | mm/backing-dev.c | 16 |
7 files changed, 103 insertions, 44 deletions
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index bb516a2cfbaf..f1eae44b9a1a 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -151,10 +151,10 @@ int __init register_nfs_fs(void) | |||
151 | if (ret < 0) | 151 | if (ret < 0) |
152 | goto error_0; | 152 | goto error_0; |
153 | 153 | ||
154 | #ifdef CONFIG_NFS_V4 | ||
155 | ret = nfs_register_sysctl(); | 154 | ret = nfs_register_sysctl(); |
156 | if (ret < 0) | 155 | if (ret < 0) |
157 | goto error_1; | 156 | goto error_1; |
157 | #ifdef CONFIG_NFS_V4 | ||
158 | ret = register_filesystem(&nfs4_fs_type); | 158 | ret = register_filesystem(&nfs4_fs_type); |
159 | if (ret < 0) | 159 | if (ret < 0) |
160 | goto error_2; | 160 | goto error_2; |
@@ -165,9 +165,9 @@ int __init register_nfs_fs(void) | |||
165 | #ifdef CONFIG_NFS_V4 | 165 | #ifdef CONFIG_NFS_V4 |
166 | error_2: | 166 | error_2: |
167 | nfs_unregister_sysctl(); | 167 | nfs_unregister_sysctl(); |
168 | #endif | ||
168 | error_1: | 169 | error_1: |
169 | unregister_filesystem(&nfs_fs_type); | 170 | unregister_filesystem(&nfs_fs_type); |
170 | #endif | ||
171 | error_0: | 171 | error_0: |
172 | return ret; | 172 | return ret; |
173 | } | 173 | } |
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index fcdcafbb3293..b62481dabae9 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c | |||
@@ -50,6 +50,14 @@ static ctl_table nfs_cb_sysctls[] = { | |||
50 | .proc_handler = &proc_dointvec_jiffies, | 50 | .proc_handler = &proc_dointvec_jiffies, |
51 | .strategy = &sysctl_jiffies, | 51 | .strategy = &sysctl_jiffies, |
52 | }, | 52 | }, |
53 | { | ||
54 | .ctl_name = CTL_UNNUMBERED, | ||
55 | .procname = "nfs_congestion_kb", | ||
56 | .data = &nfs_congestion_kb, | ||
57 | .maxlen = sizeof(nfs_congestion_kb), | ||
58 | .mode = 0644, | ||
59 | .proc_handler = &proc_dointvec, | ||
60 | }, | ||
53 | { .ctl_name = 0 } | 61 | { .ctl_name = 0 } |
54 | }; | 62 | }; |
55 | 63 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index febdade91670..2867e6b7096f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
13 | #include <linux/file.h> | 13 | #include <linux/file.h> |
14 | #include <linux/writeback.h> | 14 | #include <linux/writeback.h> |
15 | #include <linux/swap.h> | ||
15 | 16 | ||
16 | #include <linux/sunrpc/clnt.h> | 17 | #include <linux/sunrpc/clnt.h> |
17 | #include <linux/nfs_fs.h> | 18 | #include <linux/nfs_fs.h> |
@@ -38,7 +39,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, | |||
38 | struct page *, | 39 | struct page *, |
39 | unsigned int, unsigned int); | 40 | unsigned int, unsigned int); |
40 | static void nfs_mark_request_dirty(struct nfs_page *req); | 41 | static void nfs_mark_request_dirty(struct nfs_page *req); |
41 | static int nfs_wait_on_write_congestion(struct address_space *, int); | ||
42 | static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); | 42 | static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); |
43 | static const struct rpc_call_ops nfs_write_partial_ops; | 43 | static const struct rpc_call_ops nfs_write_partial_ops; |
44 | static const struct rpc_call_ops nfs_write_full_ops; | 44 | static const struct rpc_call_ops nfs_write_full_ops; |
@@ -48,8 +48,6 @@ static struct kmem_cache *nfs_wdata_cachep; | |||
48 | static mempool_t *nfs_wdata_mempool; | 48 | static mempool_t *nfs_wdata_mempool; |
49 | static mempool_t *nfs_commit_mempool; | 49 | static mempool_t *nfs_commit_mempool; |
50 | 50 | ||
51 | static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); | ||
52 | |||
53 | struct nfs_write_data *nfs_commit_alloc(void) | 51 | struct nfs_write_data *nfs_commit_alloc(void) |
54 | { | 52 | { |
55 | struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); | 53 | struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); |
@@ -211,6 +209,40 @@ static int wb_priority(struct writeback_control *wbc) | |||
211 | } | 209 | } |
212 | 210 | ||
213 | /* | 211 | /* |
212 | * NFS congestion control | ||
213 | */ | ||
214 | |||
215 | int nfs_congestion_kb; | ||
216 | |||
217 | #define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) | ||
218 | #define NFS_CONGESTION_OFF_THRESH \ | ||
219 | (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) | ||
220 | |||
221 | static void nfs_set_page_writeback(struct page *page) | ||
222 | { | ||
223 | if (!test_set_page_writeback(page)) { | ||
224 | struct inode *inode = page->mapping->host; | ||
225 | struct nfs_server *nfss = NFS_SERVER(inode); | ||
226 | |||
227 | if (atomic_inc_return(&nfss->writeback) > | ||
228 | NFS_CONGESTION_ON_THRESH) | ||
229 | set_bdi_congested(&nfss->backing_dev_info, WRITE); | ||
230 | } | ||
231 | } | ||
232 | |||
233 | static void nfs_end_page_writeback(struct page *page) | ||
234 | { | ||
235 | struct inode *inode = page->mapping->host; | ||
236 | struct nfs_server *nfss = NFS_SERVER(inode); | ||
237 | |||
238 | end_page_writeback(page); | ||
239 | if (atomic_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { | ||
240 | clear_bdi_congested(&nfss->backing_dev_info, WRITE); | ||
241 | congestion_end(WRITE); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | /* | ||
214 | * Find an associated nfs write request, and prepare to flush it out | 246 | * Find an associated nfs write request, and prepare to flush it out |
215 | * Returns 1 if there was no write request, or if the request was | 247 | * Returns 1 if there was no write request, or if the request was |
216 | * already tagged by nfs_set_page_dirty.Returns 0 if the request | 248 | * already tagged by nfs_set_page_dirty.Returns 0 if the request |
@@ -247,7 +279,7 @@ static int nfs_page_mark_flush(struct page *page) | |||
247 | spin_unlock(req_lock); | 279 | spin_unlock(req_lock); |
248 | if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { | 280 | if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { |
249 | nfs_mark_request_dirty(req); | 281 | nfs_mark_request_dirty(req); |
250 | set_page_writeback(page); | 282 | nfs_set_page_writeback(page); |
251 | } | 283 | } |
252 | ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); | 284 | ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); |
253 | nfs_unlock_request(req); | 285 | nfs_unlock_request(req); |
@@ -302,13 +334,8 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) | |||
302 | return err; | 334 | return err; |
303 | } | 335 | } |
304 | 336 | ||
305 | /* | ||
306 | * Note: causes nfs_update_request() to block on the assumption | ||
307 | * that the writeback is generated due to memory pressure. | ||
308 | */ | ||
309 | int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | 337 | int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) |
310 | { | 338 | { |
311 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
312 | struct inode *inode = mapping->host; | 339 | struct inode *inode = mapping->host; |
313 | int err; | 340 | int err; |
314 | 341 | ||
@@ -317,20 +344,12 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | |||
317 | err = generic_writepages(mapping, wbc); | 344 | err = generic_writepages(mapping, wbc); |
318 | if (err) | 345 | if (err) |
319 | return err; | 346 | return err; |
320 | while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) { | ||
321 | if (wbc->nonblocking) | ||
322 | return 0; | ||
323 | nfs_wait_on_write_congestion(mapping, 0); | ||
324 | } | ||
325 | err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); | 347 | err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); |
326 | if (err < 0) | 348 | if (err < 0) |
327 | goto out; | 349 | goto out; |
328 | nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); | 350 | nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); |
329 | err = 0; | 351 | err = 0; |
330 | out: | 352 | out: |
331 | clear_bit(BDI_write_congested, &bdi->state); | ||
332 | wake_up_all(&nfs_write_congestion); | ||
333 | congestion_end(WRITE); | ||
334 | return err; | 353 | return err; |
335 | } | 354 | } |
336 | 355 | ||
@@ -360,7 +379,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
360 | } | 379 | } |
361 | 380 | ||
362 | /* | 381 | /* |
363 | * Insert a write request into an inode | 382 | * Remove a write request from an inode |
364 | */ | 383 | */ |
365 | static void nfs_inode_remove_request(struct nfs_page *req) | 384 | static void nfs_inode_remove_request(struct nfs_page *req) |
366 | { | 385 | { |
@@ -531,10 +550,10 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, un | |||
531 | } | 550 | } |
532 | #endif | 551 | #endif |
533 | 552 | ||
534 | static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) | 553 | static int nfs_wait_on_write_congestion(struct address_space *mapping) |
535 | { | 554 | { |
555 | struct inode *inode = mapping->host; | ||
536 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 556 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
537 | DEFINE_WAIT(wait); | ||
538 | int ret = 0; | 557 | int ret = 0; |
539 | 558 | ||
540 | might_sleep(); | 559 | might_sleep(); |
@@ -542,31 +561,23 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) | |||
542 | if (!bdi_write_congested(bdi)) | 561 | if (!bdi_write_congested(bdi)) |
543 | return 0; | 562 | return 0; |
544 | 563 | ||
545 | nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT); | 564 | nfs_inc_stats(inode, NFSIOS_CONGESTIONWAIT); |
546 | 565 | ||
547 | if (intr) { | 566 | do { |
548 | struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); | 567 | struct rpc_clnt *clnt = NFS_CLIENT(inode); |
549 | sigset_t oldset; | 568 | sigset_t oldset; |
550 | 569 | ||
551 | rpc_clnt_sigmask(clnt, &oldset); | 570 | rpc_clnt_sigmask(clnt, &oldset); |
552 | prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE); | 571 | ret = congestion_wait_interruptible(WRITE, HZ/10); |
553 | if (bdi_write_congested(bdi)) { | ||
554 | if (signalled()) | ||
555 | ret = -ERESTARTSYS; | ||
556 | else | ||
557 | schedule(); | ||
558 | } | ||
559 | rpc_clnt_sigunmask(clnt, &oldset); | 572 | rpc_clnt_sigunmask(clnt, &oldset); |
560 | } else { | 573 | if (ret == -ERESTARTSYS) |
561 | prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE); | 574 | break; |
562 | if (bdi_write_congested(bdi)) | 575 | ret = 0; |
563 | schedule(); | 576 | } while (bdi_write_congested(bdi)); |
564 | } | 577 | |
565 | finish_wait(&nfs_write_congestion, &wait); | ||
566 | return ret; | 578 | return ret; |
567 | } | 579 | } |
568 | 580 | ||
569 | |||
570 | /* | 581 | /* |
571 | * Try to update any existing write request, or create one if there is none. | 582 | * Try to update any existing write request, or create one if there is none. |
572 | * In order to match, the request's credentials must match those of | 583 | * In order to match, the request's credentials must match those of |
@@ -577,14 +588,15 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) | |||
577 | static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, | 588 | static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, |
578 | struct page *page, unsigned int offset, unsigned int bytes) | 589 | struct page *page, unsigned int offset, unsigned int bytes) |
579 | { | 590 | { |
580 | struct inode *inode = page->mapping->host; | 591 | struct address_space *mapping = page->mapping; |
592 | struct inode *inode = mapping->host; | ||
581 | struct nfs_inode *nfsi = NFS_I(inode); | 593 | struct nfs_inode *nfsi = NFS_I(inode); |
582 | struct nfs_page *req, *new = NULL; | 594 | struct nfs_page *req, *new = NULL; |
583 | unsigned long rqend, end; | 595 | unsigned long rqend, end; |
584 | 596 | ||
585 | end = offset + bytes; | 597 | end = offset + bytes; |
586 | 598 | ||
587 | if (nfs_wait_on_write_congestion(page->mapping, NFS_SERVER(inode)->flags & NFS_MOUNT_INTR)) | 599 | if (nfs_wait_on_write_congestion(mapping)) |
588 | return ERR_PTR(-ERESTARTSYS); | 600 | return ERR_PTR(-ERESTARTSYS); |
589 | for (;;) { | 601 | for (;;) { |
590 | /* Loop over all inode entries and see if we find | 602 | /* Loop over all inode entries and see if we find |
@@ -727,7 +739,7 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
727 | 739 | ||
728 | static void nfs_writepage_release(struct nfs_page *req) | 740 | static void nfs_writepage_release(struct nfs_page *req) |
729 | { | 741 | { |
730 | end_page_writeback(req->wb_page); | 742 | nfs_end_page_writeback(req->wb_page); |
731 | 743 | ||
732 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 744 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
733 | if (!PageError(req->wb_page)) { | 745 | if (!PageError(req->wb_page)) { |
@@ -1042,12 +1054,12 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) | |||
1042 | if (task->tk_status < 0) { | 1054 | if (task->tk_status < 0) { |
1043 | nfs_set_pageerror(page); | 1055 | nfs_set_pageerror(page); |
1044 | req->wb_context->error = task->tk_status; | 1056 | req->wb_context->error = task->tk_status; |
1045 | end_page_writeback(page); | 1057 | nfs_end_page_writeback(page); |
1046 | nfs_inode_remove_request(req); | 1058 | nfs_inode_remove_request(req); |
1047 | dprintk(", error = %d\n", task->tk_status); | 1059 | dprintk(", error = %d\n", task->tk_status); |
1048 | goto next; | 1060 | goto next; |
1049 | } | 1061 | } |
1050 | end_page_writeback(page); | 1062 | nfs_end_page_writeback(page); |
1051 | 1063 | ||
1052 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 1064 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
1053 | if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { | 1065 | if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { |
@@ -1514,6 +1526,26 @@ int __init nfs_init_writepagecache(void) | |||
1514 | if (nfs_commit_mempool == NULL) | 1526 | if (nfs_commit_mempool == NULL) |
1515 | return -ENOMEM; | 1527 | return -ENOMEM; |
1516 | 1528 | ||
1529 | /* | ||
1530 | * NFS congestion size, scale with available memory. | ||
1531 | * | ||
1532 | * 64MB: 8192k | ||
1533 | * 128MB: 11585k | ||
1534 | * 256MB: 16384k | ||
1535 | * 512MB: 23170k | ||
1536 | * 1GB: 32768k | ||
1537 | * 2GB: 46340k | ||
1538 | * 4GB: 65536k | ||
1539 | * 8GB: 92681k | ||
1540 | * 16GB: 131072k | ||
1541 | * | ||
1542 | * This allows larger machines to have larger/more transfers. | ||
1543 | * Limit the default to 256M | ||
1544 | */ | ||
1545 | nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
1546 | if (nfs_congestion_kb > 256*1024) | ||
1547 | nfs_congestion_kb = 256*1024; | ||
1548 | |||
1517 | return 0; | 1549 | return 0; |
1518 | } | 1550 | } |
1519 | 1551 | ||
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 7011d6255593..f2542c24b328 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -93,6 +93,7 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi) | |||
93 | void clear_bdi_congested(struct backing_dev_info *bdi, int rw); | 93 | void clear_bdi_congested(struct backing_dev_info *bdi, int rw); |
94 | void set_bdi_congested(struct backing_dev_info *bdi, int rw); | 94 | void set_bdi_congested(struct backing_dev_info *bdi, int rw); |
95 | long congestion_wait(int rw, long timeout); | 95 | long congestion_wait(int rw, long timeout); |
96 | long congestion_wait_interruptible(int rw, long timeout); | ||
96 | void congestion_end(int rw); | 97 | void congestion_end(int rw); |
97 | 98 | ||
98 | #define bdi_cap_writeback_dirty(bdi) \ | 99 | #define bdi_cap_writeback_dirty(bdi) \ |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 47aaa2c66738..e9ae0c6e2c62 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -415,6 +415,7 @@ extern void nfs_complete_unlink(struct dentry *); | |||
415 | /* | 415 | /* |
416 | * linux/fs/nfs/write.c | 416 | * linux/fs/nfs/write.c |
417 | */ | 417 | */ |
418 | extern int nfs_congestion_kb; | ||
418 | extern int nfs_writepage(struct page *page, struct writeback_control *wbc); | 419 | extern int nfs_writepage(struct page *page, struct writeback_control *wbc); |
419 | extern int nfs_writepages(struct address_space *, struct writeback_control *); | 420 | extern int nfs_writepages(struct address_space *, struct writeback_control *); |
420 | extern int nfs_flush_incompatible(struct file *file, struct page *page); | 421 | extern int nfs_flush_incompatible(struct file *file, struct page *page); |
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 95796e6924f1..c95d5e642548 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h | |||
@@ -82,6 +82,7 @@ struct nfs_server { | |||
82 | struct rpc_clnt * client_acl; /* ACL RPC client handle */ | 82 | struct rpc_clnt * client_acl; /* ACL RPC client handle */ |
83 | struct nfs_iostats * io_stats; /* I/O statistics */ | 83 | struct nfs_iostats * io_stats; /* I/O statistics */ |
84 | struct backing_dev_info backing_dev_info; | 84 | struct backing_dev_info backing_dev_info; |
85 | atomic_t writeback; /* number of writeback pages */ | ||
85 | int flags; /* various flags */ | 86 | int flags; /* various flags */ |
86 | unsigned int caps; /* server capabilities */ | 87 | unsigned int caps; /* server capabilities */ |
87 | unsigned int rsize; /* read size */ | 88 | unsigned int rsize; /* read size */ |
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f50a2811f9dc..e5de3781d3fe 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -55,6 +55,22 @@ long congestion_wait(int rw, long timeout) | |||
55 | } | 55 | } |
56 | EXPORT_SYMBOL(congestion_wait); | 56 | EXPORT_SYMBOL(congestion_wait); |
57 | 57 | ||
58 | long congestion_wait_interruptible(int rw, long timeout) | ||
59 | { | ||
60 | long ret; | ||
61 | DEFINE_WAIT(wait); | ||
62 | wait_queue_head_t *wqh = &congestion_wqh[rw]; | ||
63 | |||
64 | prepare_to_wait(wqh, &wait, TASK_INTERRUPTIBLE); | ||
65 | if (signal_pending(current)) | ||
66 | ret = -ERESTARTSYS; | ||
67 | else | ||
68 | ret = io_schedule_timeout(timeout); | ||
69 | finish_wait(wqh, &wait); | ||
70 | return ret; | ||
71 | } | ||
72 | EXPORT_SYMBOL(congestion_wait_interruptible); | ||
73 | |||
58 | /** | 74 | /** |
59 | * congestion_end - wake up sleepers on a congested backing_dev_info | 75 | * congestion_end - wake up sleepers on a congested backing_dev_info |
60 | * @rw: READ or WRITE | 76 | * @rw: READ or WRITE |