aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ceph/addr.c
diff options
context:
space:
mode:
authorYehuda Sadeh <yehuda@hq.newdream.net>2009-12-18 16:51:57 -0500
committerSage Weil <sage@newdream.net>2009-12-21 19:39:56 -0500
commit2baba25019ec564cd247af74013873d69a0b8190 (patch)
treec0995b8087cff771dd51aaf1194fd238f4490f01 /fs/ceph/addr.c
parentdbd646a851713bec5bfff40ecf624b2e78518fe5 (diff)
ceph: writeback congestion control
Set bdi congestion bit when amount of write data in flight exceeds adjustable threshold. Signed-off-by: Yehuda Sadeh <yehuda@hq.newdream.net> Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r--fs/ceph/addr.c35
1 files changed, 33 insertions, 2 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index d0cdceb0b90b..a6850a14038e 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -47,6 +47,12 @@
47 * accounting is preserved. 47 * accounting is preserved.
48 */ 48 */
49 49
50#define CONGESTION_ON_THRESH(congestion_kb) (congestion_kb >> (PAGE_SHIFT-10))
51#define CONGESTION_OFF_THRESH(congestion_kb) \
52 (CONGESTION_ON_THRESH(congestion_kb) - \
53 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
54
55
50 56
51/* 57/*
52 * Dirty a page. Optimistically adjust accounting, on the assumption 58 * Dirty a page. Optimistically adjust accounting, on the assumption
@@ -377,6 +383,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
377{ 383{
378 struct inode *inode; 384 struct inode *inode;
379 struct ceph_inode_info *ci; 385 struct ceph_inode_info *ci;
386 struct ceph_client *client;
380 struct ceph_osd_client *osdc; 387 struct ceph_osd_client *osdc;
381 loff_t page_off = page->index << PAGE_CACHE_SHIFT; 388 loff_t page_off = page->index << PAGE_CACHE_SHIFT;
382 int len = PAGE_CACHE_SIZE; 389 int len = PAGE_CACHE_SIZE;
@@ -384,6 +391,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
384 int err = 0; 391 int err = 0;
385 struct ceph_snap_context *snapc; 392 struct ceph_snap_context *snapc;
386 u64 snap_size = 0; 393 u64 snap_size = 0;
394 long writeback_stat;
387 395
388 dout("writepage %p idx %lu\n", page, page->index); 396 dout("writepage %p idx %lu\n", page, page->index);
389 397
@@ -393,7 +401,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
393 } 401 }
394 inode = page->mapping->host; 402 inode = page->mapping->host;
395 ci = ceph_inode(inode); 403 ci = ceph_inode(inode);
396 osdc = &ceph_inode_to_client(inode)->osdc; 404 client = ceph_inode_to_client(inode);
405 osdc = &client->osdc;
397 406
398 /* verify this is a writeable snap context */ 407 /* verify this is a writeable snap context */
399 snapc = (void *)page->private; 408 snapc = (void *)page->private;
@@ -420,6 +429,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
420 dout("writepage %p page %p index %lu on %llu~%u\n", 429 dout("writepage %p page %p index %lu on %llu~%u\n",
421 inode, page, page->index, page_off, len); 430 inode, page, page->index, page_off, len);
422 431
432 writeback_stat = atomic_long_inc_return(&client->writeback_count);
433 if (writeback_stat >
434 CONGESTION_ON_THRESH(client->mount_args->congestion_kb))
435 set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC);
436
423 set_page_writeback(page); 437 set_page_writeback(page);
424 err = ceph_osdc_writepages(osdc, ceph_vino(inode), 438 err = ceph_osdc_writepages(osdc, ceph_vino(inode),
425 &ci->i_layout, snapc, 439 &ci->i_layout, snapc,
@@ -499,6 +513,8 @@ static void writepages_finish(struct ceph_osd_request *req,
499 struct writeback_control *wbc = req->r_wbc; 513 struct writeback_control *wbc = req->r_wbc;
500 __s32 rc = -EIO; 514 __s32 rc = -EIO;
501 u64 bytes = 0; 515 u64 bytes = 0;
516 struct ceph_client *client = ceph_inode_to_client(inode);
517 long writeback_stat;
502 518
503 /* parse reply */ 519 /* parse reply */
504 replyhead = msg->front.iov_base; 520 replyhead = msg->front.iov_base;
@@ -524,6 +540,13 @@ static void writepages_finish(struct ceph_osd_request *req,
524 BUG_ON(!page); 540 BUG_ON(!page);
525 WARN_ON(!PageUptodate(page)); 541 WARN_ON(!PageUptodate(page));
526 542
543 writeback_stat =
544 atomic_long_dec_return(&client->writeback_count);
545 if (writeback_stat <
546 CONGESTION_OFF_THRESH(client->mount_args->congestion_kb))
547 clear_bdi_congested(&client->backing_dev_info,
548 BLK_RW_ASYNC);
549
527 if (i >= wrote) { 550 if (i >= wrote) {
528 dout("inode %p skipping page %p\n", inode, page); 551 dout("inode %p skipping page %p\n", inode, page);
529 wbc->pages_skipped++; 552 wbc->pages_skipped++;
@@ -666,6 +689,7 @@ retry:
666 u64 offset, len; 689 u64 offset, len;
667 struct ceph_osd_request_head *reqhead; 690 struct ceph_osd_request_head *reqhead;
668 struct ceph_osd_op *op; 691 struct ceph_osd_op *op;
692 long writeback_stat;
669 693
670 next = 0; 694 next = 0;
671 locked_pages = 0; 695 locked_pages = 0;
@@ -773,6 +797,12 @@ get_more_pages:
773 first = i; 797 first = i;
774 dout("%p will write page %p idx %lu\n", 798 dout("%p will write page %p idx %lu\n",
775 inode, page, page->index); 799 inode, page, page->index);
800
801 writeback_stat = atomic_long_inc_return(&client->writeback_count);
802 if (writeback_stat > CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) {
803 set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC);
804 }
805
776 set_page_writeback(page); 806 set_page_writeback(page);
777 req->r_pages[locked_pages] = page; 807 req->r_pages[locked_pages] = page;
778 locked_pages++; 808 locked_pages++;
@@ -998,7 +1028,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
998 struct page *page, void *fsdata) 1028 struct page *page, void *fsdata)
999{ 1029{
1000 struct inode *inode = file->f_dentry->d_inode; 1030 struct inode *inode = file->f_dentry->d_inode;
1001 struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; 1031 struct ceph_client *client = ceph_inode_to_client(inode);
1032 struct ceph_mds_client *mdsc = &client->mdsc;
1002 unsigned from = pos & (PAGE_CACHE_SIZE - 1); 1033 unsigned from = pos & (PAGE_CACHE_SIZE - 1);
1003 int check_cap = 0; 1034 int check_cap = 0;
1004 1035