aboutsummaryrefslogtreecommitdiffstats
path: root/mm/backing-dev.c
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2010-10-26 17:21:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-26 19:52:07 -0400
commit0e093d99763eb4cea09f8ca4f1d01f34e121d10b (patch)
treefad38f9c3651c81db298521141a79d9468f71986 /mm/backing-dev.c
parent08fc468f4eaf6683bae5bdb94743a09d8630cb80 (diff)
writeback: do not sleep on the congestion queue if there are no congested BDIs or if significant congestion is not being encountered in the current zone
If congestion_wait() is called with no BDI congested, the caller will sleep for the full timeout and this may be an unnecessary sleep. This patch adds a wait_iff_congested() that checks congestion and only sleeps if a BDI is congested else, it calls cond_resched() to ensure the caller is not hogging the CPU longer than its quota but otherwise will not sleep. This is aimed at reducing some of the major desktop stalls reported during IO. For example, while kswapd is operating, it calls congestion_wait() but it could just have been reclaiming clean page cache pages with no congestion. Without this patch, it would sleep for a full timeout but after this patch, it'll just call schedule() if it has been on the CPU too long. Similar logic applies to direct reclaimers that are not making enough progress. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Wu Fengguang <fengguang.wu@intel.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Rik van Riel <riel@redhat.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/backing-dev.c')
-rw-r--r--mm/backing-dev.c61
1 files changed, 59 insertions, 2 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 55627306abe0..5ad3c106606b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -729,6 +729,7 @@ static wait_queue_head_t congestion_wqh[2] = {
729 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), 729 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
730 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) 730 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
731 }; 731 };
732static atomic_t nr_bdi_congested[2];
732 733
733void clear_bdi_congested(struct backing_dev_info *bdi, int sync) 734void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
734{ 735{
@@ -736,7 +737,8 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
736 wait_queue_head_t *wqh = &congestion_wqh[sync]; 737 wait_queue_head_t *wqh = &congestion_wqh[sync];
737 738
738 bit = sync ? BDI_sync_congested : BDI_async_congested; 739 bit = sync ? BDI_sync_congested : BDI_async_congested;
739 clear_bit(bit, &bdi->state); 740 if (test_and_clear_bit(bit, &bdi->state))
741 atomic_dec(&nr_bdi_congested[sync]);
740 smp_mb__after_clear_bit(); 742 smp_mb__after_clear_bit();
741 if (waitqueue_active(wqh)) 743 if (waitqueue_active(wqh))
742 wake_up(wqh); 744 wake_up(wqh);
@@ -748,7 +750,8 @@ void set_bdi_congested(struct backing_dev_info *bdi, int sync)
748 enum bdi_state bit; 750 enum bdi_state bit;
749 751
750 bit = sync ? BDI_sync_congested : BDI_async_congested; 752 bit = sync ? BDI_sync_congested : BDI_async_congested;
751 set_bit(bit, &bdi->state); 753 if (!test_and_set_bit(bit, &bdi->state))
754 atomic_inc(&nr_bdi_congested[sync]);
752} 755}
753EXPORT_SYMBOL(set_bdi_congested); 756EXPORT_SYMBOL(set_bdi_congested);
754 757
@@ -779,3 +782,57 @@ long congestion_wait(int sync, long timeout)
779} 782}
780EXPORT_SYMBOL(congestion_wait); 783EXPORT_SYMBOL(congestion_wait);
781 784
785/**
786 * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes
787 * @zone: A zone to check if it is heavily congested
788 * @sync: SYNC or ASYNC IO
789 * @timeout: timeout in jiffies
790 *
791 * In the event of a congested backing_dev (any backing_dev) and the given
792 * @zone has experienced recent congestion, this waits for up to @timeout
793 * jiffies for either a BDI to exit congestion of the given @sync queue
794 * or a write to complete.
795 *
796 * In the absense of zone congestion, cond_resched() is called to yield
797 * the processor if necessary but otherwise does not sleep.
798 *
799 * The return value is 0 if the sleep is for the full timeout. Otherwise,
800 * it is the number of jiffies that were still remaining when the function
801 * returned. return_value == timeout implies the function did not sleep.
802 */
803long wait_iff_congested(struct zone *zone, int sync, long timeout)
804{
805 long ret;
806 unsigned long start = jiffies;
807 DEFINE_WAIT(wait);
808 wait_queue_head_t *wqh = &congestion_wqh[sync];
809
810 /*
811 * If there is no congestion, or heavy congestion is not being
812 * encountered in the current zone, yield if necessary instead
813 * of sleeping on the congestion queue
814 */
815 if (atomic_read(&nr_bdi_congested[sync]) == 0 ||
816 !zone_is_reclaim_congested(zone)) {
817 cond_resched();
818
819 /* In case we scheduled, work out time remaining */
820 ret = timeout - (jiffies - start);
821 if (ret < 0)
822 ret = 0;
823
824 goto out;
825 }
826
827 /* Sleep until uncongested or a write happens */
828 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
829 ret = io_schedule_timeout(timeout);
830 finish_wait(wqh, &wait);
831
832out:
833 trace_writeback_wait_iff_congested(jiffies_to_usecs(timeout),
834 jiffies_to_usecs(jiffies - start));
835
836 return ret;
837}
838EXPORT_SYMBOL(wait_iff_congested);