aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2015-05-22 17:13:44 -0400
committerJens Axboe <axboe@fb.com>2015-06-02 10:33:35 -0400
commit703c270887bb5106c4c46a00cc7477d30d5e04f5 (patch)
treeded33e42162008646f8252b05b1a22085abf9527
parent482cf79cdf6669667a914ffd4cbc57a762b55fef (diff)
writeback: implement and use inode_congested()
In several places, bdi_congested() and its wrappers are used to determine whether more IOs should be issued. With cgroup writeback support, this question can't be answered solely based on the bdi (backing_dev_info). It's dependent on whether the filesystem and bdi support cgroup writeback and the blkcg the inode is associated with. This patch implements inode_congested() and its wrappers which take @inode and determines the congestion state considering cgroup writeback. The new functions replace bdi_*congested() calls in places where the query is about specific inode and task. There are several filesystem users which also fit this criteria but they should be updated when each filesystem implements cgroup writeback support. v2: Now that a given inode is associated with only one wb, congestion state can be determined independent from the asking task. Drop @task. Spotted by Vivek. Also, converted to take @inode instead of @mapping and renamed to inode_congested(). Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: Jan Kara <jack@suse.cz> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--fs/fs-writeback.c29
-rw-r--r--include/linux/backing-dev.h22
-rw-r--r--mm/fadvise.c2
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/vmscan.c11
5 files changed, 58 insertions, 8 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 99a2440c5588..7ec491b1be04 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -142,6 +142,35 @@ static void __wb_start_writeback(struct bdi_writeback *wb, long nr_pages,
142 wb_queue_work(wb, work); 142 wb_queue_work(wb, work);
143} 143}
144 144
145#ifdef CONFIG_CGROUP_WRITEBACK
146
147/**
148 * inode_congested - test whether an inode is congested
149 * @inode: inode to test for congestion
150 * @cong_bits: mask of WB_[a]sync_congested bits to test
151 *
152 * Tests whether @inode is congested. @cong_bits is the mask of congestion
153 * bits to test and the return value is the mask of set bits.
154 *
155 * If cgroup writeback is enabled for @inode, the congestion state is
156 * determined by whether the cgwb (cgroup bdi_writeback) for the blkcg
157 * associated with @inode is congested; otherwise, the root wb's congestion
158 * state is used.
159 */
160int inode_congested(struct inode *inode, int cong_bits)
161{
162 if (inode) {
163 struct bdi_writeback *wb = inode_to_wb(inode);
164 if (wb)
165 return wb_congested(wb, cong_bits);
166 }
167
168 return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
169}
170EXPORT_SYMBOL_GPL(inode_congested);
171
172#endif /* CONFIG_CGROUP_WRITEBACK */
173
145/** 174/**
146 * bdi_start_writeback - start writeback 175 * bdi_start_writeback - start writeback
147 * @bdi: the backing device to write from 176 * @bdi: the backing device to write from
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2c498a2a8268..6f0882105f95 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -230,6 +230,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
230void __inode_attach_wb(struct inode *inode, struct page *page); 230void __inode_attach_wb(struct inode *inode, struct page *page);
231void wb_memcg_offline(struct mem_cgroup *memcg); 231void wb_memcg_offline(struct mem_cgroup *memcg);
232void wb_blkcg_offline(struct blkcg *blkcg); 232void wb_blkcg_offline(struct blkcg *blkcg);
233int inode_congested(struct inode *inode, int cong_bits);
233 234
234/** 235/**
235 * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode 236 * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode
@@ -438,8 +439,29 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg)
438{ 439{
439} 440}
440 441
442static inline int inode_congested(struct inode *inode, int cong_bits)
443{
444 return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
445}
446
441#endif /* CONFIG_CGROUP_WRITEBACK */ 447#endif /* CONFIG_CGROUP_WRITEBACK */
442 448
449static inline int inode_read_congested(struct inode *inode)
450{
451 return inode_congested(inode, 1 << WB_sync_congested);
452}
453
454static inline int inode_write_congested(struct inode *inode)
455{
456 return inode_congested(inode, 1 << WB_async_congested);
457}
458
459static inline int inode_rw_congested(struct inode *inode)
460{
461 return inode_congested(inode, (1 << WB_sync_congested) |
462 (1 << WB_async_congested));
463}
464
443static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits) 465static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
444{ 466{
445 return wb_congested(&bdi->wb, cong_bits); 467 return wb_congested(&bdi->wb, cong_bits);
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 4a3907cf79f8..b8a5bc66b0c0 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -115,7 +115,7 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
115 case POSIX_FADV_NOREUSE: 115 case POSIX_FADV_NOREUSE:
116 break; 116 break;
117 case POSIX_FADV_DONTNEED: 117 case POSIX_FADV_DONTNEED:
118 if (!bdi_write_congested(bdi)) 118 if (!inode_write_congested(mapping->host))
119 __filemap_fdatawrite_range(mapping, offset, endbyte, 119 __filemap_fdatawrite_range(mapping, offset, endbyte,
120 WB_SYNC_NONE); 120 WB_SYNC_NONE);
121 121
diff --git a/mm/readahead.c b/mm/readahead.c
index 935675844b2e..60cd846a9a44 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -541,7 +541,7 @@ page_cache_async_readahead(struct address_space *mapping,
541 /* 541 /*
542 * Defer asynchronous read-ahead on IO congestion. 542 * Defer asynchronous read-ahead on IO congestion.
543 */ 543 */
544 if (bdi_read_congested(inode_to_bdi(mapping->host))) 544 if (inode_read_congested(mapping->host))
545 return; 545 return;
546 546
547 /* do read-ahead */ 547 /* do read-ahead */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7582f9fcda92..f46339870147 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -452,14 +452,13 @@ static inline int is_page_cache_freeable(struct page *page)
452 return page_count(page) - page_has_private(page) == 2; 452 return page_count(page) - page_has_private(page) == 2;
453} 453}
454 454
455static int may_write_to_queue(struct backing_dev_info *bdi, 455static int may_write_to_inode(struct inode *inode, struct scan_control *sc)
456 struct scan_control *sc)
457{ 456{
458 if (current->flags & PF_SWAPWRITE) 457 if (current->flags & PF_SWAPWRITE)
459 return 1; 458 return 1;
460 if (!bdi_write_congested(bdi)) 459 if (!inode_write_congested(inode))
461 return 1; 460 return 1;
462 if (bdi == current->backing_dev_info) 461 if (inode_to_bdi(inode) == current->backing_dev_info)
463 return 1; 462 return 1;
464 return 0; 463 return 0;
465} 464}
@@ -538,7 +537,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping,
538 } 537 }
539 if (mapping->a_ops->writepage == NULL) 538 if (mapping->a_ops->writepage == NULL)
540 return PAGE_ACTIVATE; 539 return PAGE_ACTIVATE;
541 if (!may_write_to_queue(inode_to_bdi(mapping->host), sc)) 540 if (!may_write_to_inode(mapping->host, sc))
542 return PAGE_KEEP; 541 return PAGE_KEEP;
543 542
544 if (clear_page_dirty_for_io(page)) { 543 if (clear_page_dirty_for_io(page)) {
@@ -924,7 +923,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
924 */ 923 */
925 mapping = page_mapping(page); 924 mapping = page_mapping(page);
926 if (((dirty || writeback) && mapping && 925 if (((dirty || writeback) && mapping &&
927 bdi_write_congested(inode_to_bdi(mapping->host))) || 926 inode_write_congested(mapping->host)) ||
928 (writeback && PageReclaim(page))) 927 (writeback && PageReclaim(page)))
929 nr_congested++; 928 nr_congested++;
930 929