diff options
Diffstat (limited to 'fs/nfs/write.c')
| -rw-r--r-- | fs/nfs/write.c | 116 |
1 files changed, 74 insertions, 42 deletions
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index febdade91670..2867e6b7096f 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
| 13 | #include <linux/file.h> | 13 | #include <linux/file.h> |
| 14 | #include <linux/writeback.h> | 14 | #include <linux/writeback.h> |
| 15 | #include <linux/swap.h> | ||
| 15 | 16 | ||
| 16 | #include <linux/sunrpc/clnt.h> | 17 | #include <linux/sunrpc/clnt.h> |
| 17 | #include <linux/nfs_fs.h> | 18 | #include <linux/nfs_fs.h> |
| @@ -38,7 +39,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*, | |||
| 38 | struct page *, | 39 | struct page *, |
| 39 | unsigned int, unsigned int); | 40 | unsigned int, unsigned int); |
| 40 | static void nfs_mark_request_dirty(struct nfs_page *req); | 41 | static void nfs_mark_request_dirty(struct nfs_page *req); |
| 41 | static int nfs_wait_on_write_congestion(struct address_space *, int); | ||
| 42 | static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); | 42 | static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how); |
| 43 | static const struct rpc_call_ops nfs_write_partial_ops; | 43 | static const struct rpc_call_ops nfs_write_partial_ops; |
| 44 | static const struct rpc_call_ops nfs_write_full_ops; | 44 | static const struct rpc_call_ops nfs_write_full_ops; |
| @@ -48,8 +48,6 @@ static struct kmem_cache *nfs_wdata_cachep; | |||
| 48 | static mempool_t *nfs_wdata_mempool; | 48 | static mempool_t *nfs_wdata_mempool; |
| 49 | static mempool_t *nfs_commit_mempool; | 49 | static mempool_t *nfs_commit_mempool; |
| 50 | 50 | ||
| 51 | static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); | ||
| 52 | |||
| 53 | struct nfs_write_data *nfs_commit_alloc(void) | 51 | struct nfs_write_data *nfs_commit_alloc(void) |
| 54 | { | 52 | { |
| 55 | struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); | 53 | struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); |
| @@ -211,6 +209,40 @@ static int wb_priority(struct writeback_control *wbc) | |||
| 211 | } | 209 | } |
| 212 | 210 | ||
| 213 | /* | 211 | /* |
| 212 | * NFS congestion control | ||
| 213 | */ | ||
| 214 | |||
| 215 | int nfs_congestion_kb; | ||
| 216 | |||
| 217 | #define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) | ||
| 218 | #define NFS_CONGESTION_OFF_THRESH \ | ||
| 219 | (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) | ||
| 220 | |||
| 221 | static void nfs_set_page_writeback(struct page *page) | ||
| 222 | { | ||
| 223 | if (!test_set_page_writeback(page)) { | ||
| 224 | struct inode *inode = page->mapping->host; | ||
| 225 | struct nfs_server *nfss = NFS_SERVER(inode); | ||
| 226 | |||
| 227 | if (atomic_inc_return(&nfss->writeback) > | ||
| 228 | NFS_CONGESTION_ON_THRESH) | ||
| 229 | set_bdi_congested(&nfss->backing_dev_info, WRITE); | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | static void nfs_end_page_writeback(struct page *page) | ||
| 234 | { | ||
| 235 | struct inode *inode = page->mapping->host; | ||
| 236 | struct nfs_server *nfss = NFS_SERVER(inode); | ||
| 237 | |||
| 238 | end_page_writeback(page); | ||
| 239 | if (atomic_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) { | ||
| 240 | clear_bdi_congested(&nfss->backing_dev_info, WRITE); | ||
| 241 | congestion_end(WRITE); | ||
| 242 | } | ||
| 243 | } | ||
| 244 | |||
| 245 | /* | ||
| 214 | * Find an associated nfs write request, and prepare to flush it out | 246 | * Find an associated nfs write request, and prepare to flush it out |
| 215 | * Returns 1 if there was no write request, or if the request was | 247 | * Returns 1 if there was no write request, or if the request was |
| 216 | * already tagged by nfs_set_page_dirty.Returns 0 if the request | 248 | * already tagged by nfs_set_page_dirty.Returns 0 if the request |
| @@ -247,7 +279,7 @@ static int nfs_page_mark_flush(struct page *page) | |||
| 247 | spin_unlock(req_lock); | 279 | spin_unlock(req_lock); |
| 248 | if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { | 280 | if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) { |
| 249 | nfs_mark_request_dirty(req); | 281 | nfs_mark_request_dirty(req); |
| 250 | set_page_writeback(page); | 282 | nfs_set_page_writeback(page); |
| 251 | } | 283 | } |
| 252 | ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); | 284 | ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); |
| 253 | nfs_unlock_request(req); | 285 | nfs_unlock_request(req); |
| @@ -302,13 +334,8 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) | |||
| 302 | return err; | 334 | return err; |
| 303 | } | 335 | } |
| 304 | 336 | ||
| 305 | /* | ||
| 306 | * Note: causes nfs_update_request() to block on the assumption | ||
| 307 | * that the writeback is generated due to memory pressure. | ||
| 308 | */ | ||
| 309 | int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | 337 | int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) |
| 310 | { | 338 | { |
| 311 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
| 312 | struct inode *inode = mapping->host; | 339 | struct inode *inode = mapping->host; |
| 313 | int err; | 340 | int err; |
| 314 | 341 | ||
| @@ -317,20 +344,12 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) | |||
| 317 | err = generic_writepages(mapping, wbc); | 344 | err = generic_writepages(mapping, wbc); |
| 318 | if (err) | 345 | if (err) |
| 319 | return err; | 346 | return err; |
| 320 | while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) { | ||
| 321 | if (wbc->nonblocking) | ||
| 322 | return 0; | ||
| 323 | nfs_wait_on_write_congestion(mapping, 0); | ||
| 324 | } | ||
| 325 | err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); | 347 | err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc)); |
| 326 | if (err < 0) | 348 | if (err < 0) |
| 327 | goto out; | 349 | goto out; |
| 328 | nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); | 350 | nfs_add_stats(inode, NFSIOS_WRITEPAGES, err); |
| 329 | err = 0; | 351 | err = 0; |
| 330 | out: | 352 | out: |
| 331 | clear_bit(BDI_write_congested, &bdi->state); | ||
| 332 | wake_up_all(&nfs_write_congestion); | ||
| 333 | congestion_end(WRITE); | ||
| 334 | return err; | 353 | return err; |
| 335 | } | 354 | } |
| 336 | 355 | ||
| @@ -360,7 +379,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) | |||
| 360 | } | 379 | } |
| 361 | 380 | ||
| 362 | /* | 381 | /* |
| 363 | * Insert a write request into an inode | 382 | * Remove a write request from an inode |
| 364 | */ | 383 | */ |
| 365 | static void nfs_inode_remove_request(struct nfs_page *req) | 384 | static void nfs_inode_remove_request(struct nfs_page *req) |
| 366 | { | 385 | { |
| @@ -531,10 +550,10 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, un | |||
| 531 | } | 550 | } |
| 532 | #endif | 551 | #endif |
| 533 | 552 | ||
| 534 | static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) | 553 | static int nfs_wait_on_write_congestion(struct address_space *mapping) |
| 535 | { | 554 | { |
| 555 | struct inode *inode = mapping->host; | ||
| 536 | struct backing_dev_info *bdi = mapping->backing_dev_info; | 556 | struct backing_dev_info *bdi = mapping->backing_dev_info; |
| 537 | DEFINE_WAIT(wait); | ||
| 538 | int ret = 0; | 557 | int ret = 0; |
| 539 | 558 | ||
| 540 | might_sleep(); | 559 | might_sleep(); |
| @@ -542,31 +561,23 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) | |||
| 542 | if (!bdi_write_congested(bdi)) | 561 | if (!bdi_write_congested(bdi)) |
| 543 | return 0; | 562 | return 0; |
| 544 | 563 | ||
| 545 | nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT); | 564 | nfs_inc_stats(inode, NFSIOS_CONGESTIONWAIT); |
| 546 | 565 | ||
| 547 | if (intr) { | 566 | do { |
| 548 | struct rpc_clnt *clnt = NFS_CLIENT(mapping->host); | 567 | struct rpc_clnt *clnt = NFS_CLIENT(inode); |
| 549 | sigset_t oldset; | 568 | sigset_t oldset; |
| 550 | 569 | ||
| 551 | rpc_clnt_sigmask(clnt, &oldset); | 570 | rpc_clnt_sigmask(clnt, &oldset); |
| 552 | prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE); | 571 | ret = congestion_wait_interruptible(WRITE, HZ/10); |
| 553 | if (bdi_write_congested(bdi)) { | ||
| 554 | if (signalled()) | ||
| 555 | ret = -ERESTARTSYS; | ||
| 556 | else | ||
| 557 | schedule(); | ||
| 558 | } | ||
| 559 | rpc_clnt_sigunmask(clnt, &oldset); | 572 | rpc_clnt_sigunmask(clnt, &oldset); |
| 560 | } else { | 573 | if (ret == -ERESTARTSYS) |
| 561 | prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE); | 574 | break; |
| 562 | if (bdi_write_congested(bdi)) | 575 | ret = 0; |
| 563 | schedule(); | 576 | } while (bdi_write_congested(bdi)); |
| 564 | } | 577 | |
| 565 | finish_wait(&nfs_write_congestion, &wait); | ||
| 566 | return ret; | 578 | return ret; |
| 567 | } | 579 | } |
| 568 | 580 | ||
| 569 | |||
| 570 | /* | 581 | /* |
| 571 | * Try to update any existing write request, or create one if there is none. | 582 | * Try to update any existing write request, or create one if there is none. |
| 572 | * In order to match, the request's credentials must match those of | 583 | * In order to match, the request's credentials must match those of |
| @@ -577,14 +588,15 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr) | |||
| 577 | static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, | 588 | static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, |
| 578 | struct page *page, unsigned int offset, unsigned int bytes) | 589 | struct page *page, unsigned int offset, unsigned int bytes) |
| 579 | { | 590 | { |
| 580 | struct inode *inode = page->mapping->host; | 591 | struct address_space *mapping = page->mapping; |
| 592 | struct inode *inode = mapping->host; | ||
| 581 | struct nfs_inode *nfsi = NFS_I(inode); | 593 | struct nfs_inode *nfsi = NFS_I(inode); |
| 582 | struct nfs_page *req, *new = NULL; | 594 | struct nfs_page *req, *new = NULL; |
| 583 | unsigned long rqend, end; | 595 | unsigned long rqend, end; |
| 584 | 596 | ||
| 585 | end = offset + bytes; | 597 | end = offset + bytes; |
| 586 | 598 | ||
| 587 | if (nfs_wait_on_write_congestion(page->mapping, NFS_SERVER(inode)->flags & NFS_MOUNT_INTR)) | 599 | if (nfs_wait_on_write_congestion(mapping)) |
| 588 | return ERR_PTR(-ERESTARTSYS); | 600 | return ERR_PTR(-ERESTARTSYS); |
| 589 | for (;;) { | 601 | for (;;) { |
| 590 | /* Loop over all inode entries and see if we find | 602 | /* Loop over all inode entries and see if we find |
| @@ -727,7 +739,7 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
| 727 | 739 | ||
| 728 | static void nfs_writepage_release(struct nfs_page *req) | 740 | static void nfs_writepage_release(struct nfs_page *req) |
| 729 | { | 741 | { |
| 730 | end_page_writeback(req->wb_page); | 742 | nfs_end_page_writeback(req->wb_page); |
| 731 | 743 | ||
| 732 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 744 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
| 733 | if (!PageError(req->wb_page)) { | 745 | if (!PageError(req->wb_page)) { |
| @@ -1042,12 +1054,12 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) | |||
| 1042 | if (task->tk_status < 0) { | 1054 | if (task->tk_status < 0) { |
| 1043 | nfs_set_pageerror(page); | 1055 | nfs_set_pageerror(page); |
| 1044 | req->wb_context->error = task->tk_status; | 1056 | req->wb_context->error = task->tk_status; |
| 1045 | end_page_writeback(page); | 1057 | nfs_end_page_writeback(page); |
| 1046 | nfs_inode_remove_request(req); | 1058 | nfs_inode_remove_request(req); |
| 1047 | dprintk(", error = %d\n", task->tk_status); | 1059 | dprintk(", error = %d\n", task->tk_status); |
| 1048 | goto next; | 1060 | goto next; |
| 1049 | } | 1061 | } |
| 1050 | end_page_writeback(page); | 1062 | nfs_end_page_writeback(page); |
| 1051 | 1063 | ||
| 1052 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 1064 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
| 1053 | if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { | 1065 | if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) { |
| @@ -1514,6 +1526,26 @@ int __init nfs_init_writepagecache(void) | |||
| 1514 | if (nfs_commit_mempool == NULL) | 1526 | if (nfs_commit_mempool == NULL) |
| 1515 | return -ENOMEM; | 1527 | return -ENOMEM; |
| 1516 | 1528 | ||
| 1529 | /* | ||
| 1530 | * NFS congestion size, scale with available memory. | ||
| 1531 | * | ||
| 1532 | * 64MB: 8192k | ||
| 1533 | * 128MB: 11585k | ||
| 1534 | * 256MB: 16384k | ||
| 1535 | * 512MB: 23170k | ||
| 1536 | * 1GB: 32768k | ||
| 1537 | * 2GB: 46340k | ||
| 1538 | * 4GB: 65536k | ||
| 1539 | * 8GB: 92681k | ||
| 1540 | * 16GB: 131072k | ||
| 1541 | * | ||
| 1542 | * This allows larger machines to have larger/more transfers. | ||
| 1543 | * Limit the default to 256M | ||
| 1544 | */ | ||
| 1545 | nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
| 1546 | if (nfs_congestion_kb > 256*1024) | ||
| 1547 | nfs_congestion_kb = 256*1024; | ||
| 1548 | |||
| 1517 | return 0; | 1549 | return 0; |
| 1518 | } | 1550 | } |
| 1519 | 1551 | ||
